Load Packages

# Install `remotes`
if (!require("remotes", quietly = TRUE)) {
  install.packages("remotes")
}

# Install devtools
if (!require("devtools", quietly = TRUE)) {
  install.packages("devtools")
}

# Install `librarian`
if (!require("librarian", quietly = TRUE)) {
  remotes::install_github("DesiQuintans/librarian")
}

# Install `splitTools`
if (!require("splitTools", quietly = TRUE)) {
  remotes::install_github("mayer79/splitTools")
}

# Install `BiocManager`
if (!require("BiocManager", quietly = TRUE)) {
  install.packages("BiocManager")
}

# Install `Biobase` to enable `librarian` to install missing packages
if (!require("Biobase", quietly = TRUE)) {
  BiocManager::install("Biobase")
}


# Load/Install Packages
shelf(
  sckott / cowsay,
  ggpubr,
  tidyverse,
  tableone,
  glmnet,
  rstatix,
  reticulate,
  caret,
  survival,
  survminer,
  cutpointr,
  shadowtext,
  pROC,
  gtsummary,
  gt,
  Maaslin2,
  vegan,
  pairwiseAdonis,
  umap,
  factoextra,
  EnhancedVolcano,
  ggpirate,
  splitTools,
  yingtools2,
  patchwork,
  janitor,
  labelled,
  conflicted,
  survRM2
)

# Package conflicts
{
  conflict_prefer("select", "dplyr")
  conflict_prefer("mutate", "dplyr")
  conflict_prefer("filter", "dplyr")
  conflict_prefer("rename", "dplyr")
  conflict_prefer("slice", "dplyr")
  conflict_prefer("between", "dplyr")
  conflict_prefer("annotate", "ggplot2")
  conflict_prefer("cummax", "yingtools2")
  conflict_prefer("simplify", "igraph")
  conflict_prefer("predict", "stats")
  conflict_prefer("splsda", "mixOmics")
  conflict_prefer("cbind", "base")
  conflict_prefer("make.names", "base")
  conflict_prefer("unique", "base")
  conflict_prefer("as.data.frame", "base")
  conflict_prefer("setdiff", "base")
  conflict_prefer("cutpoints", "cutpointr")
  conflict_prefer("chisq.test", "stats")
}


# color palette
devtools::source_url("https://github.com/yingeddi2008/DFIutility/blob/master/getRdpPal.R?raw=TRUE")

say(
  "Fun microbiome analysis is. Hmmm.",
  by = "yoda",
  what_color = "black",
  by_color = c("#2B7F29", "#6EBA35", "#2B7F29")
)
## 
##  ----- 
## Fun microbiome analysis is. Hmmm. 
##  ------ 
##     \   
##      \
##                    ____
##                 _.' :  `._
##             .-.'`.  ;   .'`.-.
##    __      / : ___\ ;  /___ ; \      __
##   ,'_ ""--.:__;".-.";: :".-.":__;.--"" _`,
##   :' `.t""--.. '<@.`;_  ',@>` ..--""j.' `;
##        `:-.._J '-.-'L__ `-- ' L_..-;'
##           "-.__ ;  .-"  "-.  : __.-"
##              L ' /.------.\ ' J
##              "-.   "--"   .-"
##              __.l"-:_JL_;-";.__
##          .-j/'.;  ;""""  / .'\"-.
##          .' /:`. "-.:     .-" .';  `.
##       .-"  / ;  "-. "-..-" .-"  :    "-.
##   .+"-.  : :      "-.__.-"      ;-._   \
##   ; \  `.; ;                    : : "+. ;
##   :  ;   ; ;                    : ;  : \:
##   ;  :   ; :                    ;:   ;  :
##   : \  ;  :  ;                  : ;  /  ::
##   ;  ; :   ; :                  ;   :   ;:
##   :  :  ;  :  ;                : :  ;  : ;
##   ;\    :   ; :                ; ;     ; ;
##   : `."-;   :  ;              :  ;    /  ;
##  ;    -:   ; :              ;  : .-"   :
##   :\     \  :  ;            : \.-"      :
##   ;`.    \  ; :            ;.'_..--  / ;
##   :  "-.  "-:  ;          :/."      .'  :
##    \         \ :          ;/  __        :
##     \       .-`.\        /t-""  ":-+.   :
##      `.  .-"    `l    __/ /`. :  ; ; \  ;
##        \   .-" .-"-.-"  .' .'j \  /   ;/
##         \ / .-"   /.     .'.' ;_:'    ;
##   :-""-.`./-.'     /    `.___.'
##                \ `t  ._  /  bug
##                 "-.t-._:'
## 
# ggplot theme shortcuts
et <- element_text
eb <- element_blank
er <- element_rect
el <- element_line

# Set path to python3
use_condaenv("/Users/nick/miniconda3/bin/python")

Load Data Image

# R image
load("./Data/MICU_Data_Anon.RData")

Load Data

# Load in sample lookup list 
first_samp_list_anon <- readRDS("./Data/first_samp_list_anon.rds")

# Load in clinical variables
micu_new_anon <- readRDS("./Data/micu_new_anon.rds") %>%
  mutate(
    thirtyday_mortality_overall = factor(
      thirtyday_mortality_overall,
      levels = c("Survivor", "Non-Survivor")
    ),
    sepsis.factor = case_when(sepsis.factor == "None" ~ "None",
                              TRUE ~ "Sepsis"),
    sepsis.factor = factor(sepsis.factor, levels = c("None", "Sepsis"))
  )

# Split data into stratified partitions
micu_index <-
  partition(
    y = first_samp_list_anon$thirtyday_mortality_overall,
    p = c(train = 0.75, test = 0.25),
    type = "stratified",
    seed = 543,
    shuffle = TRUE
  )

# Original cohort lookup
first_samp_list_oc <-
  first_samp_list_anon[micu_index$train, ]

# Validation cohort lookup
first_samp_list_vc <-
  first_samp_list_anon[micu_index$test, ]

# Original cohort clinical variables
micu_new_nocovid_oc <- micu_new_anon %>%
  right_join(first_samp_list_oc)

# Validation cohort clinical variables
micu_new_nocovid_vc <- micu_new_anon %>%
  right_join(first_samp_list_vc)

taxdmp <- readRDS("./Data/taxdmp.rds")

metaphlan <- readRDS("./Data/metaphlan.rds")

pal <- getRdpPal(metaphlan)

metab_quant_imp_tot_mM <- readRDS("./Data/metab_quant_imp_tot_mM.rds")

metab_qual_imp_tot <- readRDS("./Data/metab_qual_imp_tot.rds")

cri_rxmar_abx_long <- readRDS("./Data/cri_rxmar_abx_long.rds")

Custom Functions

# Need to increase Maaslin's color palette
maaslin2_association_plots <-
  function(metadata,
           features,
           output_results,
           write_to = "./",
           figures_folder = "./figures/",
           max_pngs = 10,
           save_scatter = FALSE) {
    if (is.character(metadata)) {
      metadata <- read.table(
        metadata,
        header = TRUE,
        row.names = 1,
        sep = "\t",
        fill = FALSE,
        comment.char = "",
        check.names = FALSE
      )
    }
    if (is.character(features)) {
      features <- read.table(
        features,
        header = TRUE,
        row.names = 1,
        sep = "\t",
        fill = FALSE,
        comment.char = "",
        check.names = FALSE
      )
    }
    common_rows <- intersect(rownames(features), rownames(metadata))
    input_df_all <- cbind(
      features[common_rows, , drop = FALSE],
      metadata[common_rows, , drop = FALSE]
    )
    if (is.character(output_results)) {
      output_df_all <- read.table(
        output_results,
        header = TRUE,
        row.names = NULL,
        sep = "\t",
        fill = FALSE,
        comment.char = "",
        check.names = FALSE
      )
    } else {
      output_df_all <- output_results
    }
    if (dim(output_df_all)[1] < 1) {
      print("There are no associations to plot!")
      return(NULL)
    }
    logging::loginfo(
      paste(
        "Plotting associations from most",
        "to least significant,",
        "grouped by metadata"
      )
    )
    metadata_types <- unlist(output_df_all[, "metadata"])
    metadata_labels <-
      unlist(metadata_types[!duplicated(metadata_types)])
    metadata_number <- 1
    saved_plots <- list()
    for (label in metadata_labels) {
      saved_plots[[label]] <- list()
      plot_file <- paste(write_to,
        "/",
        gsub(
          "[^[:alnum:]_]",
          "_", label
        ),
        ".pdf",
        sep = ""
      )
      data_index <- which(label == metadata_types)
      logging::loginfo(
        "Plotting data for metadata number %s, %s",
        metadata_number,
        label
      )
      pdf(plot_file,
        width = 2.65,
        height = 2.5,
        onefile = TRUE
      )
      x <- NULL
      y <- NULL
      count <- 1
      for (i in data_index) {
        x_label <- as.character(output_df_all[i, "metadata"])
        y_label <- as.character(output_df_all[i, "feature"])
        results_value <- as.character(output_df_all[i, "value"])
        qval <- as.numeric(output_df_all[i, "qval"])
        coef_val <- as.numeric(output_df_all[i, "coef"])
        input_df <- input_df_all[c(x_label, y_label)]
        colnames(input_df) <- c("x", "y")
        temp_plot <- NULL
        if (is.numeric(input_df[1, "x"]) &
          length(unique(input_df[["x"]])) >
            2) {
          logging::loginfo(
            "Creating scatter plot for continuous data, %s vs %s",
            x_label,
            y_label
          )
          temp_plot <- ggplot2::ggplot(
            data = input_df,
            ggplot2::aes(
              as.numeric(as.character(x)),
              as.numeric(as.character(y))
            )
          ) +
            ggplot2::geom_point(
              fill = "darkolivegreen4",
              color = "black",
              alpha = 0.5,
              shape = 21,
              size = 1,
              stroke = 0.15
            ) +
            ggplot2::scale_x_continuous(limits = c(
              min(input_df["x"]),
              max(input_df["x"])
            )) +
            ggplot2::scale_y_continuous(limits = c(
              min(input_df["y"]),
              max(input_df["y"])
            )) +
            ggplot2::stat_smooth(
              method = "glm",
              size = 0.5,
              color = "blue",
              na.rm = TRUE
            ) +
            ggplot2::guides(alpha = "none") +
            ggplot2::labs("") +
            ggplot2::xlab(x_label) +
            ggplot2::ylab(y_label) +
            nature_theme(input_df[, "x"], y_label) +
            ggplot2::annotate(
              geom = "text",
              x = Inf,
              y = Inf,
              hjust = 1,
              vjust = 1,
              label = sprintf(
                "FDR: %s\nCoefficient: %s\nN: %s",
                formatC(qval, format = "e", digits = 3),
                formatC(coef_val, format = "e", digits = 2),
                formatC(length(input_df[, "x"]))
              ),
              color = "black",
              size = 2,
              fontface = "italic"
            )
        } else {
          logging::loginfo(
            "Creating boxplot for categorical data, %s vs %s",
            x_label,
            y_label
          )
          input_df["x"] <- lapply(input_df["x"], as.character)
          x_axis_label_names <- unique(input_df[["x"]])
          renamed_levels <- as.character(levels(metadata[
            ,
            x_label
          ]))
          if (length(renamed_levels) == 0) {
            renamed_levels <- x_axis_label_names
          }
          for (name in x_axis_label_names) {
            total <- length(which(input_df[["x"]] == name))
            new_n <- paste(name, " (n=", total, ")", sep = "")
            input_df[which(input_df[["x"]] == name), "x"] <- new_n
            renamed_levels <- replace(
              renamed_levels,
              renamed_levels == name, new_n
            )
          }
          input_df$xnames <-
            factor(input_df[["x"]], levels = renamed_levels)
          temp_plot <- ggplot2::ggplot(
            data = input_df,
            ggplot2::aes(xnames, y)
          ) +
            ggplot2::geom_boxplot(
              ggplot2::aes(fill = x),
              outlier.alpha = 0,
              na.rm = TRUE,
              alpha = 0.5,
              show.legend = FALSE
            ) +
            ggplot2::geom_point(
              ggplot2::aes(fill = x),
              alpha = 0.75,
              size = 1,
              shape = 21,
              stroke = 0.15,
              color = "black",
              position = ggplot2::position_jitterdodge()
            ) +
            paletteer::scale_fill_paletteer_d(palette = "khroma::smoothrainbow")
          temp_plot <- temp_plot + nature_theme(input_df[
            ,
            "x"
          ], y_label) + ggplot2::theme(
            panel.grid.major = ggplot2::element_blank(),
            panel.grid.minor = ggplot2::element_blank(),
            panel.background = ggplot2::element_blank(),
            axis.line = ggplot2::element_line(colour = "black")
          ) +
            ggplot2::xlab(x_label) + ggplot2::ylab(y_label) +
            ggplot2::theme(legend.position = "none") +
            ggplot2::annotate(
              geom = "text",
              x = Inf,
              y = Inf,
              hjust = 1,
              vjust = 1,
              label = sprintf(
                "FDR: %s\nCoefficient: %s\nValue: %s",
                formatC(qval, format = "e", digits = 3),
                formatC(coef_val, format = "e", digits = 2),
                results_value
              ),
              color = "black",
              size = 2,
              fontface = "italic"
            )
        }
        stdout <- capture.output(print(temp_plot), type = "message")
        if (length(stdout) > 0) {
          logging::logdebug(stdout)
        }
        if (save_scatter) {
          saved_plots[[label]][[count]] <- temp_plot
        } else if (count <= max_pngs) {
          saved_plots[[label]][[count]] <- temp_plot
        }
        count <- count + 1
      }
      invisible(dev.off())
      for (plot_number in seq(1, min((count - 1), max_pngs))) {
        png_file <-
          file.path(
            figures_folder,
            paste0(
              substr(
                basename(plot_file),
                1, nchar(basename(plot_file)) - 4
              ), "_", plot_number,
              ".png"
            )
          )
        png(png_file,
          res = 300,
          width = 960,
          height = 960
        )
        stdout <-
          capture.output(print(saved_plots[[label]][[plot_number]]))
        invisible(dev.off())
      }
      if (save_scatter) {
        names(saved_plots[[label]]) <- make.names(output_df_all[
          data_index,
          "feature"
        ], unique = TRUE)
      } else {
        saved_plots[[label]] <- NULL
      }
      metadata_number <- metadata_number + 1
    }
    return(saved_plots)
  }

`%!in%` <- negate(`%in%`)

Training Cohort

Training Cohort Analysis

Univariate Clinical Statistics

# Build tableone for original cohort
tableone_nocovid_df <-
  micu_new_nocovid_oc %>%
  left_join(cri_rxmar_abx_long, by = "unique_id") %>%
  mutate(across(Cephalosporins:Doxycycline, ~ str_to_title(.))) %>% 
  mutate(across(Cephalosporins:Doxycycline, ~ replace_na(., "Unchecked"))) %>%
  mutate(across(Cephalosporins:Doxycycline, ~ as.factor(.))) %>% 
  mutate(across(Cephalosporins:Doxycycline, ~ factor(., levels = c("Unchecked", "Checked")))) %>% 
  mutate(across(Hypertension:Tuberculosis, ~ factor(., levels = c("Unchecked", "Checked")))) %>% 
  mutate(across(Acute.respiratory.distress.syndrome:Newly.diagnosed.solid.malignancy, ~ factor(., levels = c("Unchecked", "Checked")))) %>%
  mutate(across(Myocardial.infract:AIDS, ~ factor(., levels = c("Unchecked", "Checked")))) %>% 
  select(
    age,
    sex.factor,
    bmi,
    race.factor,
    cci_total_sc,
    thirtyday_mortality_overall,
    primary_dx.factor,
    ards.factor,
    sepsis.factor,
    admit_from.factor,
    COVID_upon_admission,
    sofa_score_total,
    ap2_total_score,
    reason_for_intubation.factor,
    reintub_1.factor,
    reintub_2.factor,
    total_ventilator_days,
    icu_los_total,
    hospital_los,
    day_collected,
    Hypertension:`Neuromuscular.disorder`,
    `Peptic.ulcer.disease`,
    `Thyroid.disease`:Tuberculosis,
    `Bacterial.pneumonia`:`Newly.diagnosed.solid.malignancy`,
    `Myocardial.infract`:`AIDS`,
    Penicillins,
    Cephalosporins,
    Carbapenems,
    Vancomycin,
    Metronidazole,
    Macrolides,
    Quinolones,
    other,
    Clindamycin,
    Aminoglycosides,
    Doxycycline,
    `Trimethoprim-Sulfamethoxazole`,
    Rifaximin,
    `diet`,
    dSOFA_admission, 
    dSOFA_stool
  ) %>%
  janitor::clean_names() %>%
  select(-c(
    hypertension:tuberculosis,
    reason_for_intubation_factor:hospital_los
  )) %>%
  replace_na(list(reason_for_intubation_factor = "Not intubated")) %>%
  droplevels()

tableone_nocovid <- CreateTableOne(
  data = tableone_nocovid_df,
  strata = "thirtyday_mortality_overall",
  includeNA = TRUE,
  
)
summary(tableone_nocovid)
## 
##      ### Summary of continuous variables ###
## 
## thirtyday_mortality_overall: Survivor
##                    n miss p.miss mean sd median p25 p75 min max skew kurt
## age              102    0      0   60 18     64  50  70  19  97 -0.6 -0.2
## bmi              102    1      1   28  9     26  22  32  12  63  1.3  2.1
## cci_total_sc     102    0      0    5  3      4   3   7   0  12  0.4 -0.2
## sofa_score_total 102    0      0    8  4      7   4  11   1  17  0.2 -1.0
## ap2_total_score  102    0      0   24  8     24  19  29   7  45  0.3 -0.1
## day_collected    102    0      0    3  3      3   2   4   0  23  3.7 21.9
## d_sofa_admission 102    1      1    1  3      1   0   3 -12   8 -0.9  3.4
## d_sofa_stool     102    6      6    1  2      1   0   2  -6   8  0.3  1.9
## ------------------------------------------------------------ 
## thirtyday_mortality_overall: Non-Survivor
##                   n miss p.miss mean sd median p25 p75 min max   skew  kurt
## age              45    0      0 62.1 13     64  54  69  29  89 -0.251 -0.09
## bmi              45    0      0 26.8  9     24  21  28  14  54  1.536  2.01
## cci_total_sc     45    0      0  5.5  3      5   4   7   0  12  0.468 -0.02
## sofa_score_total 45    0      0 10.2  4     10   6  14   2  22  0.435 -0.09
## ap2_total_score  45    0      0 27.2  6     27  23  32  14  41  0.116 -0.21
## day_collected    45    0      0  3.8  4      2   2   5   0  15  1.496  1.57
## d_sofa_admission 45    1      2 -0.2  3      0  -2   2  -7   6 -0.277 -0.75
## d_sofa_stool     45    5     11 -0.1  3      0  -2   2  -6   7  0.001 -0.05
## 
## p-values
##                      pNormal  pNonNormal
## age              0.391166213 0.755680780
## bmi              0.509029953 0.325493821
## cci_total_sc     0.116532302 0.183194531
## sofa_score_total 0.000528178 0.001227156
## ap2_total_score  0.014515939 0.007974396
## day_collected    0.273124828 0.776545378
## d_sofa_admission 0.019058036 0.033241089
## d_sofa_stool     0.006469348 0.015657115
## 
## Standardize mean differences
##                     1 vs 2
## age              0.1623180
## bmi              0.1190168
## cci_total_sc     0.2770153
## sofa_score_total 0.6228183
## ap2_total_score  0.4638797
## day_collected    0.1887799
## d_sofa_admission 0.4176105
## d_sofa_stool     0.4804820
## 
## =======================================================================================
## 
##      ### Summary of categorical variables ### 
## 
## thirtyday_mortality_overall: Survivor
##                                            var   n miss p.miss
##                                     sex_factor 102    0    0.0
##                                                               
##                                                               
##                                    race_factor 102    0    0.0
##                                                               
##                                                               
##                                                               
##                                                               
##                                                               
##                                                               
##                    thirtyday_mortality_overall 102    0    0.0
##                                                               
##                                                               
##                              primary_dx_factor 102    0    0.0
##                                                               
##                                                               
##                                                               
##                                                               
##                                                               
##                                                               
##                                                               
##                                                               
##                                                               
##                                                               
##                                                               
##                                                               
##                                    ards_factor 102    0    0.0
##                                                               
##                                                               
##                                  sepsis_factor 102    0    0.0
##                                                               
##                                                               
##                              admit_from_factor 102    0    0.0
##                                                               
##                                                               
##                                                               
##                                                               
##                                                               
##                                                               
##                                                               
##                                                               
##                                                               
##                           covid_upon_admission 102    0    0.0
##                                                               
##                            bacterial_pneumonia 102    0    0.0
##                                                               
##                                                               
##                               fungal_pneumonia 102    0    0.0
##                                                               
##                                                               
##                                viral_pneumonia 102    0    0.0
##                                                               
##                                                               
##   chronic_obstructive_pulmonary_disease_copd_1 102    0    0.0
##                                                               
##                                                               
##                            asthma_exacerbation 102    0    0.0
##                                                               
##                                                               
##                            lung_lobar_collapse 102    0    0.0
##                                                               
##                             pulmonary_embolism 102    0    0.0
##                                                               
##                                                               
##                                     hemoptysis 102    0    0.0
##                                                               
##                                                               
##                                   pancreatitis 102    0    0.0
##                                                               
##                                                               
##                 infection_genitourinary_system 102    0    0.0
##                                                               
##                                                               
##                      infection_intra_abdominal 102    0    0.0
##                                                               
##                                                               
##                          infection_soft_tissue 102    0    0.0
##                                                               
##                                                               
##                                  infection_cns 102    0    0.0
##                                                               
##                                                               
##               hepatic_failure_acute_fullminant 102    0    0.0
##                                                               
##                                                               
##               hepatic_failure_acute_on_chronic 102    0    0.0
##                                                               
##                                                               
##                          diabetic_ketoacidosis 102    0    0.0
##                                                               
##                                                               
##                                 acute_leukemia 102    0    0.0
##                                                               
##                                                               
##                   cerebral_vascular_accident_1 102    0    0.0
##                                                               
##                                                               
##       acute_myocardial_infarction_nstemi_stemi 102    0    0.0
##                                                               
##                                                               
##                    diffuse_alveolar_hemorrhage 102    0    0.0
##                                                               
##                                                               
##   decompensated_heart_failure_pulmonary_oedema 102    0    0.0
##                                                               
##                                                               
##                               pleural_effusion 102    0    0.0
##                                                               
##                                                               
##         interstitial_lung_disease_exacerbation 102    0    0.0
##                                                               
##                                                               
##                           organizing_pneumonia 102    0    0.0
##                                                               
##                  acute_eosinophilic_pneumoniae 102    0    0.0
##                                                               
##                                          other 102    0    0.0
##                                                               
##                                                               
##                                     angioedema 102    0    0.0
##                                                               
##                                                               
##                            acute_renal_failure 102    0    0.0
##                                                               
##                                                               
##                          altered_mental_status 102    0    0.0
##                                                               
##                                                               
##                           hypertensive_urgency 102    0    0.0
##                                                               
##                                                               
##                         hypertensive_emergency 102    0    0.0
##                                                               
##                                                               
##                                   endocarditis 102    0    0.0
##                                                               
##                                                               
##                                     bacteremia 102    0    0.0
##                                                               
##                                                               
##                      gastrointestinal_bleeding 102    0    0.0
##                                                               
##                                                               
##                              hemorrhagic_shock 102    0    0.0
##                                                               
##                                                               
##                                     aspiration 102    0    0.0
##                                                               
##                                                               
##  central_line_associated_blood_steam_infection 102    0    0.0
##                                                               
##                                                               
##                     prosthetic_joint_infection 102    0    0.0
##                                                               
##                                                               
##                  new_onset_atrial_fibrillation 102    0    0.0
##                                                               
##                                                               
##               newly_diagnosed_solid_malignancy 102    0    0.0
##                                                               
##                                                               
##                             myocardial_infract 102    0    0.0
##                                                               
##                                                               
##                       congestive_heart_failure 102    0    0.0
##                                                               
##                                                               
##                peripheral_vascular_disease_cci 102    0    0.0
##                                                               
##                                                               
##                        cerebrovascular_disease 102    0    0.0
##                                                               
##                                                               
##                                       dementia 102    0    0.0
##                                                               
##                                                               
##                      chronic_pulmonary_disease 102    0    0.0
##                                                               
##                                                               
##                    connective_tissue_disease_1 102    0    0.0
##                                                               
##                                                               
##                                  ulcer_disease 102    0    0.0
##                                                               
##                                                               
##                             mild_liver_disease 102    0    0.0
##                                                               
##                                                               
##                 diabetes_without_complications 102    0    0.0
##                                                               
##                                                               
##                 diabetes_with_end_organ_damage 102    0    0.0
##                                                               
##                                                               
##                                     hemiplegia 102    0    0.0
##                                                               
##                                                               
##               moderate_or_severe_renal_disease 102    0    0.0
##                                                               
##                                                               
##                     solid_tumor_non_metastatic 102    0    0.0
##                                                               
##                                                               
##                                       leukemia 102    0    0.0
##                                                               
##                                                               
##                                        lymhoma 102    0    0.0
##                                                               
##                                                               
##               moderate_or_severe_liver_disease 102    0    0.0
##                                                               
##                                                               
##                         metastatic_solid_tumor 102    0    0.0
##                                                               
##                                                               
##                                           aids 102    0    0.0
##                                                               
##                                                               
##                                    penicillins 102    0    0.0
##                                                               
##                                                               
##                                 cephalosporins 102    0    0.0
##                                                               
##                                                               
##                                    carbapenems 102    0    0.0
##                                                               
##                                                               
##                                     vancomycin 102    0    0.0
##                                                               
##                                                               
##                                  metronidazole 102    0    0.0
##                                                               
##                                                               
##                                     macrolides 102    0    0.0
##                                                               
##                                                               
##                                     quinolones 102    0    0.0
##                                                               
##                                                               
##                                        other_2 102    0    0.0
##                                                               
##                                                               
##                                    clindamycin 102    0    0.0
##                                                               
##                                                               
##                                aminoglycosides 102    0    0.0
##                                                               
##                                                               
##                                    doxycycline 102    0    0.0
##                                                               
##                                                               
##                  trimethoprim_sulfamethoxazole 102    0    0.0
##                                                               
##                                                               
##                                      rifaximin 102    0    0.0
##                                                               
##                                                               
##                                           diet 102    0    0.0
##                                                               
##                                                               
##                                   level freq percent cum.percent
##                                  Female   48    47.1        47.1
##                                    Male   54    52.9       100.0
##                                                                 
##                        African American   72    70.6        70.6
##                                   Asian    1     1.0        71.6
##                      More than one race    2     2.0        73.5
##                         White, Hispanic    3     2.9        76.5
##                     White, non-Hispanic   24    23.5       100.0
##                                    <NA>    0     0.0       100.0
##                                                                 
##                                Survivor  102   100.0       100.0
##                            Non-Survivor    0     0.0       100.0
##                                                                 
##        Acute (on chronic) liver failure    4     3.9         3.9
##                         AMI/dysrhythmia    2     2.0         5.9
##                   CHF/cardiogenic shock    3     2.9         8.8
##                           CNS pathology    2     2.0        10.8
##                           GI hemorrhage    7     6.9        17.6
##                               Metabolic    3     2.9        20.6
##                                   Other    4     3.9        24.5
##              Post-operative observation    3     2.9        27.5
##               Respiratory failure, AHRF   28    27.5        54.9
##  Respiratory failure, airway compromise   10     9.8        64.7
##        Respiratory failure, ventilatory    9     8.8        73.5
##               Sepsis (+/- septic shock)   27    26.5       100.0
##                                                                 
##                                      No   85    83.3        83.3
##                                     Yes   17    16.7       100.0
##                                                                 
##                                    None   35    34.3        34.3
##                                  Sepsis   67    65.7       100.0
##                                                                 
##                              Cardiology    2     2.0         2.0
##                                      ED   51    50.0        52.0
##                        General Medicine   16    15.7        67.6
##                                   Liver    3     2.9        70.6
##                               Neurology    4     3.9        74.5
##                                Oncology    5     4.9        79.4
##                                     OSH   12    11.8        91.2
##                                 Surgery    8     7.8        99.0
##                                    <NA>    1     1.0       100.0
##                                                                 
##                                      No  102   100.0       100.0
##                                                                 
##                               Unchecked   74    72.5        72.5
##                                 Checked   28    27.5       100.0
##                                                                 
##                               Unchecked  100    98.0        98.0
##                                 Checked    2     2.0       100.0
##                                                                 
##                               Unchecked  100    98.0        98.0
##                                 Checked    2     2.0       100.0
##                                                                 
##                               Unchecked   96    94.1        94.1
##                                 Checked    6     5.9       100.0
##                                                                 
##                               Unchecked  100    98.0        98.0
##                                 Checked    2     2.0       100.0
##                                                                 
##                               Unchecked  102   100.0       100.0
##                                                                 
##                               Unchecked   97    95.1        95.1
##                                 Checked    5     4.9       100.0
##                                                                 
##                               Unchecked  101    99.0        99.0
##                                 Checked    1     1.0       100.0
##                                                                 
##                               Unchecked  101    99.0        99.0
##                                 Checked    1     1.0       100.0
##                                                                 
##                               Unchecked   87    85.3        85.3
##                                 Checked   15    14.7       100.0
##                                                                 
##                               Unchecked   92    90.2        90.2
##                                 Checked   10     9.8       100.0
##                                                                 
##                               Unchecked   95    93.1        93.1
##                                 Checked    7     6.9       100.0
##                                                                 
##                               Unchecked   99    97.1        97.1
##                                 Checked    3     2.9       100.0
##                                                                 
##                               Unchecked  102   100.0       100.0
##                                 Checked    0     0.0       100.0
##                                                                 
##                               Unchecked   96    94.1        94.1
##                                 Checked    6     5.9       100.0
##                                                                 
##                               Unchecked  100    98.0        98.0
##                                 Checked    2     2.0       100.0
##                                                                 
##                               Unchecked  102   100.0       100.0
##                                 Checked    0     0.0       100.0
##                                                                 
##                               Unchecked   98    96.1        96.1
##                                 Checked    4     3.9       100.0
##                                                                 
##                               Unchecked  100    98.0        98.0
##                                 Checked    2     2.0       100.0
##                                                                 
##                               Unchecked  101    99.0        99.0
##                                 Checked    1     1.0       100.0
##                                                                 
##                               Unchecked   85    83.3        83.3
##                                 Checked   17    16.7       100.0
##                                                                 
##                               Unchecked  101    99.0        99.0
##                                 Checked    1     1.0       100.0
##                                                                 
##                               Unchecked  101    99.0        99.0
##                                 Checked    1     1.0       100.0
##                                                                 
##                               Unchecked  102   100.0       100.0
##                                                                 
##                               Unchecked  102   100.0       100.0
##                                                                 
##                               Unchecked   87    85.3        85.3
##                                 Checked   15    14.7       100.0
##                                                                 
##                               Unchecked  101    99.0        99.0
##                                 Checked    1     1.0       100.0
##                                                                 
##                               Unchecked   68    66.7        66.7
##                                 Checked   34    33.3       100.0
##                                                                 
##                               Unchecked   79    77.5        77.5
##                                 Checked   23    22.5       100.0
##                                                                 
##                               Unchecked  101    99.0        99.0
##                                 Checked    1     1.0       100.0
##                                                                 
##                               Unchecked  100    98.0        98.0
##                                 Checked    2     2.0       100.0
##                                                                 
##                               Unchecked   99    97.1        97.1
##                                 Checked    3     2.9       100.0
##                                                                 
##                               Unchecked   97    95.1        95.1
##                                 Checked    5     4.9       100.0
##                                                                 
##                               Unchecked  100    98.0        98.0
##                                 Checked    2     2.0       100.0
##                                                                 
##                               Unchecked  102   100.0       100.0
##                                 Checked    0     0.0       100.0
##                                                                 
##                               Unchecked   95    93.1        93.1
##                                 Checked    7     6.9       100.0
##                                                                 
##                               Unchecked   99    97.1        97.1
##                                 Checked    3     2.9       100.0
##                                                                 
##                               Unchecked  101    99.0        99.0
##                                 Checked    1     1.0       100.0
##                                                                 
##                               Unchecked   99    97.1        97.1
##                                 Checked    3     2.9       100.0
##                                                                 
##                               Unchecked  101    99.0        99.0
##                                 Checked    1     1.0       100.0
##                                                                 
##                               Unchecked   96    94.1        94.1
##                                 Checked    6     5.9       100.0
##                                                                 
##                               Unchecked   79    77.5        77.5
##                                 Checked   23    22.5       100.0
##                                                                 
##                               Unchecked  101    99.0        99.0
##                                 Checked    1     1.0       100.0
##                                                                 
##                               Unchecked   85    83.3        83.3
##                                 Checked   17    16.7       100.0
##                                                                 
##                               Unchecked   98    96.1        96.1
##                                 Checked    4     3.9       100.0
##                                                                 
##                               Unchecked   68    66.7        66.7
##                                 Checked   34    33.3       100.0
##                                                                 
##                               Unchecked   94    92.2        92.2
##                                 Checked    8     7.8       100.0
##                                                                 
##                               Unchecked  100    98.0        98.0
##                                 Checked    2     2.0       100.0
##                                                                 
##                               Unchecked  100    98.0        98.0
##                                 Checked    2     2.0       100.0
##                                                                 
##                               Unchecked   90    88.2        88.2
##                                 Checked   12    11.8       100.0
##                                                                 
##                               Unchecked   89    87.3        87.3
##                                 Checked   13    12.7       100.0
##                                                                 
##                               Unchecked   95    93.1        93.1
##                                 Checked    7     6.9       100.0
##                                                                 
##                               Unchecked   88    86.3        86.3
##                                 Checked   14    13.7       100.0
##                                                                 
##                               Unchecked   83    81.4        81.4
##                                 Checked   19    18.6       100.0
##                                                                 
##                               Unchecked   98    96.1        96.1
##                                 Checked    4     3.9       100.0
##                                                                 
##                               Unchecked   97    95.1        95.1
##                                 Checked    5     4.9       100.0
##                                                                 
##                               Unchecked   92    90.2        90.2
##                                 Checked   10     9.8       100.0
##                                                                 
##                               Unchecked   98    96.1        96.1
##                                 Checked    4     3.9       100.0
##                                                                 
##                               Unchecked  100    98.0        98.0
##                                 Checked    2     2.0       100.0
##                                                                 
##                               Unchecked   86    84.3        84.3
##                                 Checked   16    15.7       100.0
##                                                                 
##                               Unchecked   34    33.3        33.3
##                                 Checked   68    66.7       100.0
##                                                                 
##                               Unchecked   97    95.1        95.1
##                                 Checked    5     4.9       100.0
##                                                                 
##                               Unchecked   46    45.1        45.1
##                                 Checked   56    54.9       100.0
##                                                                 
##                               Unchecked   64    62.7        62.7
##                                 Checked   38    37.3       100.0
##                                                                 
##                               Unchecked   82    80.4        80.4
##                                 Checked   20    19.6       100.0
##                                                                 
##                               Unchecked   96    94.1        94.1
##                                 Checked    6     5.9       100.0
##                                                                 
##                               Unchecked   94    92.2        92.2
##                                 Checked    8     7.8       100.0
##                                                                 
##                               Unchecked  100    98.0        98.0
##                                 Checked    2     2.0       100.0
##                                                                 
##                               Unchecked   91    89.2        89.2
##                                 Checked   11    10.8       100.0
##                                                                 
##                               Unchecked   97    95.1        95.1
##                                 Checked    5     4.9       100.0
##                                                                 
##                               Unchecked   91    89.2        89.2
##                                 Checked   11    10.8       100.0
##                                                                 
##                               Unchecked   97    95.1        95.1
##                                 Checked    5     4.9       100.0
##                                                                 
##                                    diet   75    73.5        73.5
##                                     npo   27    26.5       100.0
##                                                                 
## ------------------------------------------------------------ 
## thirtyday_mortality_overall: Non-Survivor
##                                            var  n miss p.miss
##                                     sex_factor 45    0    0.0
##                                                              
##                                                              
##                                    race_factor 45    0    0.0
##                                                              
##                                                              
##                                                              
##                                                              
##                                                              
##                                                              
##                    thirtyday_mortality_overall 45    0    0.0
##                                                              
##                                                              
##                              primary_dx_factor 45    0    0.0
##                                                              
##                                                              
##                                                              
##                                                              
##                                                              
##                                                              
##                                                              
##                                                              
##                                                              
##                                                              
##                                                              
##                                                              
##                                    ards_factor 45    0    0.0
##                                                              
##                                                              
##                                  sepsis_factor 45    0    0.0
##                                                              
##                                                              
##                              admit_from_factor 45    0    0.0
##                                                              
##                                                              
##                                                              
##                                                              
##                                                              
##                                                              
##                                                              
##                                                              
##                                                              
##                           covid_upon_admission 45    0    0.0
##                                                              
##                            bacterial_pneumonia 45    0    0.0
##                                                              
##                                                              
##                               fungal_pneumonia 45    0    0.0
##                                                              
##                                                              
##                                viral_pneumonia 45    0    0.0
##                                                              
##                                                              
##   chronic_obstructive_pulmonary_disease_copd_1 45    0    0.0
##                                                              
##                                                              
##                            asthma_exacerbation 45    0    0.0
##                                                              
##                                                              
##                            lung_lobar_collapse 45    0    0.0
##                                                              
##                             pulmonary_embolism 45    0    0.0
##                                                              
##                                                              
##                                     hemoptysis 45    0    0.0
##                                                              
##                                                              
##                                   pancreatitis 45    0    0.0
##                                                              
##                                                              
##                 infection_genitourinary_system 45    0    0.0
##                                                              
##                                                              
##                      infection_intra_abdominal 45    0    0.0
##                                                              
##                                                              
##                          infection_soft_tissue 45    0    0.0
##                                                              
##                                                              
##                                  infection_cns 45    0    0.0
##                                                              
##                                                              
##               hepatic_failure_acute_fullminant 45    0    0.0
##                                                              
##                                                              
##               hepatic_failure_acute_on_chronic 45    0    0.0
##                                                              
##                                                              
##                          diabetic_ketoacidosis 45    0    0.0
##                                                              
##                                                              
##                                 acute_leukemia 45    0    0.0
##                                                              
##                                                              
##                   cerebral_vascular_accident_1 45    0    0.0
##                                                              
##                                                              
##       acute_myocardial_infarction_nstemi_stemi 45    0    0.0
##                                                              
##                                                              
##                    diffuse_alveolar_hemorrhage 45    0    0.0
##                                                              
##                                                              
##   decompensated_heart_failure_pulmonary_oedema 45    0    0.0
##                                                              
##                                                              
##                               pleural_effusion 45    0    0.0
##                                                              
##                                                              
##         interstitial_lung_disease_exacerbation 45    0    0.0
##                                                              
##                                                              
##                           organizing_pneumonia 45    0    0.0
##                                                              
##                  acute_eosinophilic_pneumoniae 45    0    0.0
##                                                              
##                                          other 45    0    0.0
##                                                              
##                                                              
##                                     angioedema 45    0    0.0
##                                                              
##                                                              
##                            acute_renal_failure 45    0    0.0
##                                                              
##                                                              
##                          altered_mental_status 45    0    0.0
##                                                              
##                                                              
##                           hypertensive_urgency 45    0    0.0
##                                                              
##                                                              
##                         hypertensive_emergency 45    0    0.0
##                                                              
##                                                              
##                                   endocarditis 45    0    0.0
##                                                              
##                                                              
##                                     bacteremia 45    0    0.0
##                                                              
##                                                              
##                      gastrointestinal_bleeding 45    0    0.0
##                                                              
##                                                              
##                              hemorrhagic_shock 45    0    0.0
##                                                              
##                                                              
##                                     aspiration 45    0    0.0
##                                                              
##                                                              
##  central_line_associated_blood_steam_infection 45    0    0.0
##                                                              
##                                                              
##                     prosthetic_joint_infection 45    0    0.0
##                                                              
##                                                              
##                  new_onset_atrial_fibrillation 45    0    0.0
##                                                              
##                                                              
##               newly_diagnosed_solid_malignancy 45    0    0.0
##                                                              
##                                                              
##                             myocardial_infract 45    0    0.0
##                                                              
##                                                              
##                       congestive_heart_failure 45    0    0.0
##                                                              
##                                                              
##                peripheral_vascular_disease_cci 45    0    0.0
##                                                              
##                                                              
##                        cerebrovascular_disease 45    0    0.0
##                                                              
##                                                              
##                                       dementia 45    0    0.0
##                                                              
##                                                              
##                      chronic_pulmonary_disease 45    0    0.0
##                                                              
##                                                              
##                    connective_tissue_disease_1 45    0    0.0
##                                                              
##                                                              
##                                  ulcer_disease 45    0    0.0
##                                                              
##                                                              
##                             mild_liver_disease 45    0    0.0
##                                                              
##                                                              
##                 diabetes_without_complications 45    0    0.0
##                                                              
##                                                              
##                 diabetes_with_end_organ_damage 45    0    0.0
##                                                              
##                                                              
##                                     hemiplegia 45    0    0.0
##                                                              
##                                                              
##               moderate_or_severe_renal_disease 45    0    0.0
##                                                              
##                                                              
##                     solid_tumor_non_metastatic 45    0    0.0
##                                                              
##                                                              
##                                       leukemia 45    0    0.0
##                                                              
##                                                              
##                                        lymhoma 45    0    0.0
##                                                              
##                                                              
##               moderate_or_severe_liver_disease 45    0    0.0
##                                                              
##                                                              
##                         metastatic_solid_tumor 45    0    0.0
##                                                              
##                                                              
##                                           aids 45    0    0.0
##                                                              
##                                                              
##                                    penicillins 45    0    0.0
##                                                              
##                                                              
##                                 cephalosporins 45    0    0.0
##                                                              
##                                                              
##                                    carbapenems 45    0    0.0
##                                                              
##                                                              
##                                     vancomycin 45    0    0.0
##                                                              
##                                                              
##                                  metronidazole 45    0    0.0
##                                                              
##                                                              
##                                     macrolides 45    0    0.0
##                                                              
##                                                              
##                                     quinolones 45    0    0.0
##                                                              
##                                                              
##                                        other_2 45    0    0.0
##                                                              
##                                                              
##                                    clindamycin 45    0    0.0
##                                                              
##                                                              
##                                aminoglycosides 45    0    0.0
##                                                              
##                                                              
##                                    doxycycline 45    0    0.0
##                                                              
##                                                              
##                  trimethoprim_sulfamethoxazole 45    0    0.0
##                                                              
##                                                              
##                                      rifaximin 45    0    0.0
##                                                              
##                                                              
##                                           diet 45    0    0.0
##                                                              
##                                                              
##                                   level freq percent cum.percent
##                                  Female   18    40.0        40.0
##                                    Male   27    60.0       100.0
##                                                                 
##                        African American   21    46.7        46.7
##                                   Asian    0     0.0        46.7
##                      More than one race    3     6.7        53.3
##                         White, Hispanic    0     0.0        53.3
##                     White, non-Hispanic   15    33.3        86.7
##                                    <NA>    6    13.3       100.0
##                                                                 
##                                Survivor    0     0.0         0.0
##                            Non-Survivor   45   100.0       100.0
##                                                                 
##        Acute (on chronic) liver failure    9    20.0        20.0
##                         AMI/dysrhythmia    0     0.0        20.0
##                   CHF/cardiogenic shock    0     0.0        20.0
##                           CNS pathology    0     0.0        20.0
##                           GI hemorrhage    2     4.4        24.4
##                               Metabolic    1     2.2        26.7
##                                   Other    1     2.2        28.9
##              Post-operative observation    1     2.2        31.1
##               Respiratory failure, AHRF   13    28.9        60.0
##  Respiratory failure, airway compromise    1     2.2        62.2
##        Respiratory failure, ventilatory    3     6.7        68.9
##               Sepsis (+/- septic shock)   14    31.1       100.0
##                                                                 
##                                      No   25    55.6        55.6
##                                     Yes   20    44.4       100.0
##                                                                 
##                                    None    7    15.6        15.6
##                                  Sepsis   38    84.4       100.0
##                                                                 
##                              Cardiology    1     2.2         2.2
##                                      ED   11    24.4        26.7
##                        General Medicine    8    17.8        44.4
##                                   Liver    6    13.3        57.8
##                               Neurology    0     0.0        57.8
##                                Oncology   10    22.2        80.0
##                                     OSH    8    17.8        97.8
##                                 Surgery    1     2.2       100.0
##                                    <NA>    0     0.0       100.0
##                                                                 
##                                      No   45   100.0       100.0
##                                                                 
##                               Unchecked   33    73.3        73.3
##                                 Checked   12    26.7       100.0
##                                                                 
##                               Unchecked   44    97.8        97.8
##                                 Checked    1     2.2       100.0
##                                                                 
##                               Unchecked   45   100.0       100.0
##                                 Checked    0     0.0       100.0
##                                                                 
##                               Unchecked   45   100.0       100.0
##                                 Checked    0     0.0       100.0
##                                                                 
##                               Unchecked   45   100.0       100.0
##                                 Checked    0     0.0       100.0
##                                                                 
##                               Unchecked   45   100.0       100.0
##                                                                 
##                               Unchecked   44    97.8        97.8
##                                 Checked    1     2.2       100.0
##                                                                 
##                               Unchecked   45   100.0       100.0
##                                 Checked    0     0.0       100.0
##                                                                 
##                               Unchecked   44    97.8        97.8
##                                 Checked    1     2.2       100.0
##                                                                 
##                               Unchecked   42    93.3        93.3
##                                 Checked    3     6.7       100.0
##                                                                 
##                               Unchecked   31    68.9        68.9
##                                 Checked   14    31.1       100.0
##                                                                 
##                               Unchecked   38    84.4        84.4
##                                 Checked    7    15.6       100.0
##                                                                 
##                               Unchecked   45   100.0       100.0
##                                 Checked    0     0.0       100.0
##                                                                 
##                               Unchecked   43    95.6        95.6
##                                 Checked    2     4.4       100.0
##                                                                 
##                               Unchecked   39    86.7        86.7
##                                 Checked    6    13.3       100.0
##                                                                 
##                               Unchecked   42    93.3        93.3
##                                 Checked    3     6.7       100.0
##                                                                 
##                               Unchecked   44    97.8        97.8
##                                 Checked    1     2.2       100.0
##                                                                 
##                               Unchecked   44    97.8        97.8
##                                 Checked    1     2.2       100.0
##                                                                 
##                               Unchecked   45   100.0       100.0
##                                 Checked    0     0.0       100.0
##                                                                 
##                               Unchecked   45   100.0       100.0
##                                 Checked    0     0.0       100.0
##                                                                 
##                               Unchecked   43    95.6        95.6
##                                 Checked    2     4.4       100.0
##                                                                 
##                               Unchecked   38    84.4        84.4
##                                 Checked    7    15.6       100.0
##                                                                 
##                               Unchecked   44    97.8        97.8
##                                 Checked    1     2.2       100.0
##                                                                 
##                               Unchecked   45   100.0       100.0
##                                                                 
##                               Unchecked   45   100.0       100.0
##                                                                 
##                               Unchecked   40    88.9        88.9
##                                 Checked    5    11.1       100.0
##                                                                 
##                               Unchecked   45   100.0       100.0
##                                 Checked    0     0.0       100.0
##                                                                 
##                               Unchecked   18    40.0        40.0
##                                 Checked   27    60.0       100.0
##                                                                 
##                               Unchecked   32    71.1        71.1
##                                 Checked   13    28.9       100.0
##                                                                 
##                               Unchecked   45   100.0       100.0
##                                 Checked    0     0.0       100.0
##                                                                 
##                               Unchecked   45   100.0       100.0
##                                 Checked    0     0.0       100.0
##                                                                 
##                               Unchecked   45   100.0       100.0
##                                 Checked    0     0.0       100.0
##                                                                 
##                               Unchecked   41    91.1        91.1
##                                 Checked    4     8.9       100.0
##                                                                 
##                               Unchecked   45   100.0       100.0
##                                 Checked    0     0.0       100.0
##                                                                 
##                               Unchecked   44    97.8        97.8
##                                 Checked    1     2.2       100.0
##                                                                 
##                               Unchecked   39    86.7        86.7
##                                 Checked    6    13.3       100.0
##                                                                 
##                               Unchecked   44    97.8        97.8
##                                 Checked    1     2.2       100.0
##                                                                 
##                               Unchecked   45   100.0       100.0
##                                 Checked    0     0.0       100.0
##                                                                 
##                               Unchecked   45   100.0       100.0
##                                 Checked    0     0.0       100.0
##                                                                 
##                               Unchecked   44    97.8        97.8
##                                 Checked    1     2.2       100.0
##                                                                 
##                               Unchecked   42    93.3        93.3
##                                 Checked    3     6.7       100.0
##                                                                 
##                               Unchecked   39    86.7        86.7
##                                 Checked    6    13.3       100.0
##                                                                 
##                               Unchecked   42    93.3        93.3
##                                 Checked    3     6.7       100.0
##                                                                 
##                               Unchecked   43    95.6        95.6
##                                 Checked    2     4.4       100.0
##                                                                 
##                               Unchecked   42    93.3        93.3
##                                 Checked    3     6.7       100.0
##                                                                 
##                               Unchecked   34    75.6        75.6
##                                 Checked   11    24.4       100.0
##                                                                 
##                               Unchecked   43    95.6        95.6
##                                 Checked    2     4.4       100.0
##                                                                 
##                               Unchecked   45   100.0       100.0
##                                 Checked    0     0.0       100.0
##                                                                 
##                               Unchecked   41    91.1        91.1
##                                 Checked    4     8.9       100.0
##                                                                 
##                               Unchecked   38    84.4        84.4
##                                 Checked    7    15.6       100.0
##                                                                 
##                               Unchecked   43    95.6        95.6
##                                 Checked    2     4.4       100.0
##                                                                 
##                               Unchecked   43    95.6        95.6
##                                 Checked    2     4.4       100.0
##                                                                 
##                               Unchecked   42    93.3        93.3
##                                 Checked    3     6.7       100.0
##                                                                 
##                               Unchecked   39    86.7        86.7
##                                 Checked    6    13.3       100.0
##                                                                 
##                               Unchecked   43    95.6        95.6
##                                 Checked    2     4.4       100.0
##                                                                 
##                               Unchecked   44    97.8        97.8
##                                 Checked    1     2.2       100.0
##                                                                 
##                               Unchecked   36    80.0        80.0
##                                 Checked    9    20.0       100.0
##                                                                 
##                               Unchecked   34    75.6        75.6
##                                 Checked   11    24.4       100.0
##                                                                 
##                               Unchecked   45   100.0       100.0
##                                 Checked    0     0.0       100.0
##                                                                 
##                               Unchecked   33    73.3        73.3
##                                 Checked   12    26.7       100.0
##                                                                 
##                               Unchecked   10    22.2        22.2
##                                 Checked   35    77.8       100.0
##                                                                 
##                               Unchecked   42    93.3        93.3
##                                 Checked    3     6.7       100.0
##                                                                 
##                               Unchecked   13    28.9        28.9
##                                 Checked   32    71.1       100.0
##                                                                 
##                               Unchecked   18    40.0        40.0
##                                 Checked   27    60.0       100.0
##                                                                 
##                               Unchecked   38    84.4        84.4
##                                 Checked    7    15.6       100.0
##                                                                 
##                               Unchecked   43    95.6        95.6
##                                 Checked    2     4.4       100.0
##                                                                 
##                               Unchecked   43    95.6        95.6
##                                 Checked    2     4.4       100.0
##                                                                 
##                               Unchecked   44    97.8        97.8
##                                 Checked    1     2.2       100.0
##                                                                 
##                               Unchecked   40    88.9        88.9
##                                 Checked    5    11.1       100.0
##                                                                 
##                               Unchecked   45   100.0       100.0
##                                 Checked    0     0.0       100.0
##                                                                 
##                               Unchecked   42    93.3        93.3
##                                 Checked    3     6.7       100.0
##                                                                 
##                               Unchecked   38    84.4        84.4
##                                 Checked    7    15.6       100.0
##                                                                 
##                                    diet   25    55.6        55.6
##                                     npo   20    44.4       100.0
##                                                                 
## 
## p-values
##                                                    pApprox       pExact
## sex_factor                                    5.397969e-01 4.747843e-01
## race_factor                                   6.945780e-04 3.106768e-04
## thirtyday_mortality_overall                   8.236795e-33 6.658599e-39
## primary_dx_factor                             1.511175e-01 2.149766e-01
## ards_factor                                   7.506454e-04 7.679078e-04
## sepsis_factor                                 3.382206e-02 2.843758e-02
## admit_from_factor                             2.069687e-03           NA
## covid_upon_admission                                    NA           NA
## bacterial_pneumonia                           1.000000e+00 1.000000e+00
## fungal_pneumonia                              1.000000e+00 1.000000e+00
## viral_pneumonia                               8.623407e-01 1.000000e+00
## chronic_obstructive_pulmonary_disease_copd_1  2.266592e-01 1.777744e-01
## asthma_exacerbation                           8.623407e-01 1.000000e+00
## lung_lobar_collapse                                     NA           NA
## pulmonary_embolism                            7.607016e-01 6.671197e-01
## hemoptysis                                    1.000000e+00 1.000000e+00
## pancreatitis                                  1.000000e+00 5.199888e-01
## infection_genitourinary_system                2.724514e-01 2.739740e-01
## infection_intra_abdominal                     2.889823e-03 2.843277e-03
## infection_soft_tissue                         1.770362e-01 1.274538e-01
## infection_cns                                 5.964421e-01 5.530930e-01
## hepatic_failure_acute_fullminant              1.702506e-01 9.225608e-02
## hepatic_failure_acute_on_chronic              2.325481e-01 1.880308e-01
## diabetic_ketoacidosis                         3.385383e-01 1.676199e-01
## acute_leukemia                                6.729482e-01 3.061224e-01
## cerebral_vascular_accident_1                  9.758893e-01 1.000000e+00
## acute_myocardial_infarction_nstemi_stemi      8.623407e-01 1.000000e+00
## diffuse_alveolar_hemorrhage                   1.000000e+00 1.000000e+00
## decompensated_heart_failure_pulmonary_oedema  7.688210e-02 5.952058e-02
## pleural_effusion                              1.394363e-03 1.087369e-03
## interstitial_lung_disease_exacerbation        1.000000e+00 5.199888e-01
## organizing_pneumonia                                    NA           NA
## acute_eosinophilic_pneumoniae                           NA           NA
## other                                         7.452529e-01 7.944921e-01
## angioedema                                    1.000000e+00 1.000000e+00
## acute_renal_failure                           4.473887e-03 3.519391e-03
## altered_mental_status                         5.380647e-01 4.131363e-01
## hypertensive_urgency                          1.000000e+00 1.000000e+00
## hypertensive_emergency                        8.623407e-01 1.000000e+00
## endocarditis                                  5.964421e-01 5.530930e-01
## bacteremia                                    5.781839e-01 4.565879e-01
## gastrointestinal_bleeding                     8.623407e-01 1.000000e+00
## hemorrhagic_shock                             6.729482e-01 3.061224e-01
## aspiration                                    3.379067e-01 2.184143e-01
## central_line_associated_blood_steam_infection 1.000000e+00 1.000000e+00
## prosthetic_joint_infection                    1.000000e+00 1.000000e+00
## new_onset_atrial_fibrillation                 5.964421e-01 5.530930e-01
## newly_diagnosed_solid_malignancy              1.000000e+00 5.199888e-01
## myocardial_infract                            1.000000e+00 1.000000e+00
## congestive_heart_failure                      2.849789e-01 2.619492e-01
## peripheral_vascular_disease_cci               1.606189e-01 8.549594e-02
## cerebrovascular_disease                       7.688210e-02 5.952058e-02
## dementia                                      7.640837e-01 4.380668e-01
## chronic_pulmonary_disease                     3.769271e-01 3.343848e-01
## connective_tissue_disease_1                   6.899791e-01 7.240219e-01
## ulcer_disease                                 8.623407e-01 1.000000e+00
## mild_liver_disease                            1.324938e-01 7.123028e-02
## diabetes_without_complications                7.153348e-01 5.958192e-01
## diabetes_with_end_organ_damage                2.161980e-01 1.504596e-01
## hemiplegia                                    8.489765e-01 7.222971e-01
## moderate_or_severe_renal_disease              3.402887e-01 2.726467e-01
## solid_tumor_non_metastatic                    5.828321e-01 4.851836e-01
## leukemia                                      1.000000e+00 1.000000e+00
## lymhoma                                       7.607016e-01 6.671197e-01
## moderate_or_severe_liver_disease              1.522622e-01 1.107845e-01
## metastatic_solid_tumor                        4.777565e-04 4.103289e-04
## aids                                          8.623407e-01 1.000000e+00
## penicillins                                   1.819853e-01 1.701692e-01
## cephalosporins                                2.459035e-01 2.408761e-01
## carbapenems                                   9.678942e-01 7.007697e-01
## vancomycin                                    9.585958e-02 7.072966e-02
## metronidazole                                 1.736230e-02 1.218238e-02
## macrolides                                    7.235654e-01 6.485720e-01
## quinolones                                    1.000000e+00 1.000000e+00
## other_2                                       6.899791e-01 7.240219e-01
## clindamycin                                   1.000000e+00 1.000000e+00
## aminoglycosides                               1.000000e+00 1.000000e+00
## doxycycline                                   3.089141e-01 3.235917e-01
## trimethoprim_sulfamethoxazole                 6.319316e-01 5.517006e-01
## rifaximin                                     6.468617e-02 4.638206e-02
## diet                                          4.979753e-02 3.630862e-02
## 
## Standardize mean differences
##                                                   1 vs 2
## sex_factor                                    0.14273582
## race_factor                                   0.77980327
## thirtyday_mortality_overall                          NaN
## primary_dx_factor                             0.74461692
## ards_factor                                   0.63245553
## sepsis_factor                                 0.44414236
## admit_from_factor                             0.93514630
## covid_upon_admission                          0.00000000
## bacterial_pneumonia                           0.01765493
## fungal_pneumonia                              0.01827036
## viral_pneumonia                               0.20000000
## chronic_obstructive_pulmonary_disease_copd_1  0.35355339
## asthma_exacerbation                           0.20000000
## lung_lobar_collapse                           0.00000000
## pulmonary_embolism                            0.14496187
## hemoptysis                                    0.14071951
## pancreatitis                                  0.09905175
## infection_genitourinary_system                0.26245140
## infection_intra_abdominal                     0.54764630
## infection_soft_tissue                         0.27819611
## infection_cns                                 0.24618298
## hepatic_failure_acute_fullminant              0.30499714
## hepatic_failure_acute_on_chronic              0.25487862
## diabetic_ketoacidosis                         0.23319662
## acute_leukemia                                0.21320072
## cerebral_vascular_accident_1                  0.09860092
## acute_myocardial_infarction_nstemi_stemi      0.20000000
## diffuse_alveolar_hemorrhage                   0.14071951
## decompensated_heart_failure_pulmonary_oedema  0.40587919
## pleural_effusion                              0.54880431
## interstitial_lung_disease_exacerbation        0.09905175
## organizing_pneumonia                          0.00000000
## acute_eosinophilic_pneumoniae                 0.00000000
## other                                         0.10736690
## angioedema                                    0.14071951
## acute_renal_failure                           0.55470020
## altered_mental_status                         0.14543194
## hypertensive_urgency                          0.14071951
## hypertensive_emergency                        0.20000000
## endocarditis                                  0.24618298
## bacteremia                                    0.15784131
## gastrointestinal_bleeding                     0.20000000
## hemorrhagic_shock                             0.21320072
## aspiration                                    0.21600254
## central_line_associated_blood_steam_infection 0.04534608
## prosthetic_joint_infection                    0.14071951
## new_onset_atrial_fibrillation                 0.24618298
## newly_diagnosed_solid_malignancy              0.09905175
## myocardial_infract                            0.03234654
## congestive_heart_failure                      0.24193240
## peripheral_vascular_disease_cci               0.29983872
## cerebrovascular_disease                       0.40587919
## dementia                                      0.12282592
## chronic_pulmonary_disease                     0.19706586
## connective_tissue_disease_1                   0.14189020
## ulcer_disease                                 0.20000000
## mild_liver_disease                            0.30950785
## diabetes_without_complications                0.11055182
## diabetes_with_end_organ_damage                0.29944961
## hemiplegia                                    0.10485311
## moderate_or_severe_renal_disease              0.23487809
## solid_tumor_non_metastatic                    0.14485908
## leukemia                                      0.02611981
## lymhoma                                       0.14496187
## moderate_or_severe_liver_disease              0.28929992
## metastatic_solid_tumor                        0.61548280
## aids                                          0.20000000
## penicillins                                   0.27121904
## cephalosporins                                0.25000000
## carbapenems                                   0.07564762
## vancomycin                                    0.34057453
## metronidazole                                 0.46733180
## macrolides                                    0.10660414
## quinolones                                    0.06501368
## other_2                                       0.14189020
## clindamycin                                   0.01827036
## aminoglycosides                               0.01046647
## doxycycline                                   0.32108065
## trimethoprim_sulfamethoxazole                 0.14629795
## rifaximin                                     0.35713468
## diet                                          0.38253061
# Print tableone
tableone_nocovid_print <-
  print(tableone_nocovid,
    nonnormal = TRUE,
    formatOptions = list(big.mark = ",")
  )
##                                                              Stratified by thirtyday_mortality_overall
##                                                               Survivor            
##   n                                                             102               
##   age (median [IQR])                                          63.50 [50.25, 70.00]
##   sex_factor = Male (%)                                          54 ( 52.9)       
##   bmi (median [IQR])                                          26.26 [21.72, 32.29]
##   race_factor (%)                                                                 
##      African American                                            72 ( 70.6)       
##      Asian                                                        1 (  1.0)       
##      More than one race                                           2 (  2.0)       
##      White, Hispanic                                              3 (  2.9)       
##      White, non-Hispanic                                         24 ( 23.5)       
##      NA                                                           0 (  0.0)       
##   cci_total_sc (median [IQR])                                  4.00 [3.00, 6.75]  
##   thirtyday_mortality_overall = Non-Survivor (%)                  0 (  0.0)       
##   primary_dx_factor (%)                                                           
##      Acute (on chronic) liver failure                             4 (  3.9)       
##      AMI/dysrhythmia                                              2 (  2.0)       
##      CHF/cardiogenic shock                                        3 (  2.9)       
##      CNS pathology                                                2 (  2.0)       
##      GI hemorrhage                                                7 (  6.9)       
##      Metabolic                                                    3 (  2.9)       
##      Other                                                        4 (  3.9)       
##      Post-operative observation                                   3 (  2.9)       
##      Respiratory failure, AHRF                                   28 ( 27.5)       
##      Respiratory failure, airway compromise                      10 (  9.8)       
##      Respiratory failure, ventilatory                             9 (  8.8)       
##      Sepsis (+/- septic shock)                                   27 ( 26.5)       
##   ards_factor = Yes (%)                                          17 ( 16.7)       
##   sepsis_factor = Sepsis (%)                                     67 ( 65.7)       
##   admit_from_factor (%)                                                           
##      Cardiology                                                   2 (  2.0)       
##      ED                                                          51 ( 50.0)       
##      General Medicine                                            16 ( 15.7)       
##      Liver                                                        3 (  2.9)       
##      Neurology                                                    4 (  3.9)       
##      Oncology                                                     5 (  4.9)       
##      OSH                                                         12 ( 11.8)       
##      Surgery                                                      8 (  7.8)       
##      NA                                                           1 (  1.0)       
##   covid_upon_admission = No (%)                                 102 (100.0)       
##   sofa_score_total (median [IQR])                              7.00 [4.00, 11.00] 
##   ap2_total_score (median [IQR])                              23.50 [19.00, 29.00]
##   day_collected (median [IQR])                                 3.00 [2.00, 4.00]  
##   bacterial_pneumonia = Checked (%)                              28 ( 27.5)       
##   fungal_pneumonia = Checked (%)                                  2 (  2.0)       
##   viral_pneumonia = Checked (%)                                   2 (  2.0)       
##   chronic_obstructive_pulmonary_disease_copd_1 = Checked (%)      6 (  5.9)       
##   asthma_exacerbation = Checked (%)                               2 (  2.0)       
##   lung_lobar_collapse = Unchecked (%)                           102 (100.0)       
##   pulmonary_embolism = Checked (%)                                5 (  4.9)       
##   hemoptysis = Checked (%)                                        1 (  1.0)       
##   pancreatitis = Checked (%)                                      1 (  1.0)       
##   infection_genitourinary_system = Checked (%)                   15 ( 14.7)       
##   infection_intra_abdominal = Checked (%)                        10 (  9.8)       
##   infection_soft_tissue = Checked (%)                             7 (  6.9)       
##   infection_cns = Checked (%)                                     3 (  2.9)       
##   hepatic_failure_acute_fullminant = Checked (%)                  0 (  0.0)       
##   hepatic_failure_acute_on_chronic = Checked (%)                  6 (  5.9)       
##   diabetic_ketoacidosis = Checked (%)                             2 (  2.0)       
##   acute_leukemia = Checked (%)                                    0 (  0.0)       
##   cerebral_vascular_accident_1 = Checked (%)                      4 (  3.9)       
##   acute_myocardial_infarction_nstemi_stemi = Checked (%)          2 (  2.0)       
##   diffuse_alveolar_hemorrhage = Checked (%)                       1 (  1.0)       
##   decompensated_heart_failure_pulmonary_oedema = Checked (%)     17 ( 16.7)       
##   pleural_effusion = Checked (%)                                  1 (  1.0)       
##   interstitial_lung_disease_exacerbation = Checked (%)            1 (  1.0)       
##   organizing_pneumonia = Unchecked (%)                          102 (100.0)       
##   acute_eosinophilic_pneumoniae = Unchecked (%)                 102 (100.0)       
##   other = Checked (%)                                            15 ( 14.7)       
##   angioedema = Checked (%)                                        1 (  1.0)       
##   acute_renal_failure = Checked (%)                              34 ( 33.3)       
##   altered_mental_status = Checked (%)                            23 ( 22.5)       
##   hypertensive_urgency = Checked (%)                              1 (  1.0)       
##   hypertensive_emergency = Checked (%)                            2 (  2.0)       
##   endocarditis = Checked (%)                                      3 (  2.9)       
##   bacteremia = Checked (%)                                        5 (  4.9)       
##   gastrointestinal_bleeding = Checked (%)                         2 (  2.0)       
##   hemorrhagic_shock = Checked (%)                                 0 (  0.0)       
##   aspiration = Checked (%)                                        7 (  6.9)       
##   central_line_associated_blood_steam_infection = Checked (%)     3 (  2.9)       
##   prosthetic_joint_infection = Checked (%)                        1 (  1.0)       
##   new_onset_atrial_fibrillation = Checked (%)                     3 (  2.9)       
##   newly_diagnosed_solid_malignancy = Checked (%)                  1 (  1.0)       
##   myocardial_infract = Checked (%)                                6 (  5.9)       
##   congestive_heart_failure = Checked (%)                         23 ( 22.5)       
##   peripheral_vascular_disease_cci = Checked (%)                   1 (  1.0)       
##   cerebrovascular_disease = Checked (%)                          17 ( 16.7)       
##   dementia = Checked (%)                                          4 (  3.9)       
##   chronic_pulmonary_disease = Checked (%)                        34 ( 33.3)       
##   connective_tissue_disease_1 = Checked (%)                       8 (  7.8)       
##   ulcer_disease = Checked (%)                                     2 (  2.0)       
##   mild_liver_disease = Checked (%)                                2 (  2.0)       
##   diabetes_without_complications = Checked (%)                   12 ( 11.8)       
##   diabetes_with_end_organ_damage = Checked (%)                   13 ( 12.7)       
##   hemiplegia = Checked (%)                                        7 (  6.9)       
##   moderate_or_severe_renal_disease = Checked (%)                 14 ( 13.7)       
##   solid_tumor_non_metastatic = Checked (%)                       19 ( 18.6)       
##   leukemia = Checked (%)                                          4 (  3.9)       
##   lymhoma = Checked (%)                                           5 (  4.9)       
##   moderate_or_severe_liver_disease = Checked (%)                 10 (  9.8)       
##   metastatic_solid_tumor = Checked (%)                            4 (  3.9)       
##   aids = Checked (%)                                              2 (  2.0)       
##   penicillins = Checked (%)                                      16 ( 15.7)       
##   cephalosporins = Checked (%)                                   68 ( 66.7)       
##   carbapenems = Checked (%)                                       5 (  4.9)       
##   vancomycin = Checked (%)                                       56 ( 54.9)       
##   metronidazole = Checked (%)                                    38 ( 37.3)       
##   macrolides = Checked (%)                                       20 ( 19.6)       
##   quinolones = Checked (%)                                        6 (  5.9)       
##   other_2 = Checked (%)                                           8 (  7.8)       
##   clindamycin = Checked (%)                                       2 (  2.0)       
##   aminoglycosides = Checked (%)                                  11 ( 10.8)       
##   doxycycline = Checked (%)                                       5 (  4.9)       
##   trimethoprim_sulfamethoxazole = Checked (%)                    11 ( 10.8)       
##   rifaximin = Checked (%)                                         5 (  4.9)       
##   diet = npo (%)                                                 27 ( 26.5)       
##   d_sofa_admission (median [IQR])                              1.00 [0.00, 3.00]  
##   d_sofa_stool (median [IQR])                                  1.00 [0.00, 2.00]  
##                                                              Stratified by thirtyday_mortality_overall
##                                                               Non-Survivor        
##   n                                                              45               
##   age (median [IQR])                                          64.00 [54.00, 69.00]
##   sex_factor = Male (%)                                          27 ( 60.0)       
##   bmi (median [IQR])                                          24.10 [21.09, 28.06]
##   race_factor (%)                                                                 
##      African American                                            21 ( 46.7)       
##      Asian                                                        0 (  0.0)       
##      More than one race                                           3 (  6.7)       
##      White, Hispanic                                              0 (  0.0)       
##      White, non-Hispanic                                         15 ( 33.3)       
##      NA                                                           6 ( 13.3)       
##   cci_total_sc (median [IQR])                                  5.00 [4.00, 7.00]  
##   thirtyday_mortality_overall = Non-Survivor (%)                 45 (100.0)       
##   primary_dx_factor (%)                                                           
##      Acute (on chronic) liver failure                             9 ( 20.0)       
##      AMI/dysrhythmia                                              0 (  0.0)       
##      CHF/cardiogenic shock                                        0 (  0.0)       
##      CNS pathology                                                0 (  0.0)       
##      GI hemorrhage                                                2 (  4.4)       
##      Metabolic                                                    1 (  2.2)       
##      Other                                                        1 (  2.2)       
##      Post-operative observation                                   1 (  2.2)       
##      Respiratory failure, AHRF                                   13 ( 28.9)       
##      Respiratory failure, airway compromise                       1 (  2.2)       
##      Respiratory failure, ventilatory                             3 (  6.7)       
##      Sepsis (+/- septic shock)                                   14 ( 31.1)       
##   ards_factor = Yes (%)                                          20 ( 44.4)       
##   sepsis_factor = Sepsis (%)                                     38 ( 84.4)       
##   admit_from_factor (%)                                                           
##      Cardiology                                                   1 (  2.2)       
##      ED                                                          11 ( 24.4)       
##      General Medicine                                             8 ( 17.8)       
##      Liver                                                        6 ( 13.3)       
##      Neurology                                                    0 (  0.0)       
##      Oncology                                                    10 ( 22.2)       
##      OSH                                                          8 ( 17.8)       
##      Surgery                                                      1 (  2.2)       
##      NA                                                           0 (  0.0)       
##   covid_upon_admission = No (%)                                  45 (100.0)       
##   sofa_score_total (median [IQR])                             10.00 [6.00, 14.00] 
##   ap2_total_score (median [IQR])                              27.00 [23.00, 32.00]
##   day_collected (median [IQR])                                 2.00 [2.00, 5.00]  
##   bacterial_pneumonia = Checked (%)                              12 ( 26.7)       
##   fungal_pneumonia = Checked (%)                                  1 (  2.2)       
##   viral_pneumonia = Checked (%)                                   0 (  0.0)       
##   chronic_obstructive_pulmonary_disease_copd_1 = Checked (%)      0 (  0.0)       
##   asthma_exacerbation = Checked (%)                               0 (  0.0)       
##   lung_lobar_collapse = Unchecked (%)                            45 (100.0)       
##   pulmonary_embolism = Checked (%)                                1 (  2.2)       
##   hemoptysis = Checked (%)                                        0 (  0.0)       
##   pancreatitis = Checked (%)                                      1 (  2.2)       
##   infection_genitourinary_system = Checked (%)                    3 (  6.7)       
##   infection_intra_abdominal = Checked (%)                        14 ( 31.1)       
##   infection_soft_tissue = Checked (%)                             7 ( 15.6)       
##   infection_cns = Checked (%)                                     0 (  0.0)       
##   hepatic_failure_acute_fullminant = Checked (%)                  2 (  4.4)       
##   hepatic_failure_acute_on_chronic = Checked (%)                  6 ( 13.3)       
##   diabetic_ketoacidosis = Checked (%)                             3 (  6.7)       
##   acute_leukemia = Checked (%)                                    1 (  2.2)       
##   cerebral_vascular_accident_1 = Checked (%)                      1 (  2.2)       
##   acute_myocardial_infarction_nstemi_stemi = Checked (%)          0 (  0.0)       
##   diffuse_alveolar_hemorrhage = Checked (%)                       0 (  0.0)       
##   decompensated_heart_failure_pulmonary_oedema = Checked (%)      2 (  4.4)       
##   pleural_effusion = Checked (%)                                  7 ( 15.6)       
##   interstitial_lung_disease_exacerbation = Checked (%)            1 (  2.2)       
##   organizing_pneumonia = Unchecked (%)                           45 (100.0)       
##   acute_eosinophilic_pneumoniae = Unchecked (%)                  45 (100.0)       
##   other = Checked (%)                                             5 ( 11.1)       
##   angioedema = Checked (%)                                        0 (  0.0)       
##   acute_renal_failure = Checked (%)                              27 ( 60.0)       
##   altered_mental_status = Checked (%)                            13 ( 28.9)       
##   hypertensive_urgency = Checked (%)                              0 (  0.0)       
##   hypertensive_emergency = Checked (%)                            0 (  0.0)       
##   endocarditis = Checked (%)                                      0 (  0.0)       
##   bacteremia = Checked (%)                                        4 (  8.9)       
##   gastrointestinal_bleeding = Checked (%)                         0 (  0.0)       
##   hemorrhagic_shock = Checked (%)                                 1 (  2.2)       
##   aspiration = Checked (%)                                        6 ( 13.3)       
##   central_line_associated_blood_steam_infection = Checked (%)     1 (  2.2)       
##   prosthetic_joint_infection = Checked (%)                        0 (  0.0)       
##   new_onset_atrial_fibrillation = Checked (%)                     0 (  0.0)       
##   newly_diagnosed_solid_malignancy = Checked (%)                  1 (  2.2)       
##   myocardial_infract = Checked (%)                                3 (  6.7)       
##   congestive_heart_failure = Checked (%)                          6 ( 13.3)       
##   peripheral_vascular_disease_cci = Checked (%)                   3 (  6.7)       
##   cerebrovascular_disease = Checked (%)                           2 (  4.4)       
##   dementia = Checked (%)                                          3 (  6.7)       
##   chronic_pulmonary_disease = Checked (%)                        11 ( 24.4)       
##   connective_tissue_disease_1 = Checked (%)                       2 (  4.4)       
##   ulcer_disease = Checked (%)                                     0 (  0.0)       
##   mild_liver_disease = Checked (%)                                4 (  8.9)       
##   diabetes_without_complications = Checked (%)                    7 ( 15.6)       
##   diabetes_with_end_organ_damage = Checked (%)                    2 (  4.4)       
##   hemiplegia = Checked (%)                                        2 (  4.4)       
##   moderate_or_severe_renal_disease = Checked (%)                  3 (  6.7)       
##   solid_tumor_non_metastatic = Checked (%)                        6 ( 13.3)       
##   leukemia = Checked (%)                                          2 (  4.4)       
##   lymhoma = Checked (%)                                           1 (  2.2)       
##   moderate_or_severe_liver_disease = Checked (%)                  9 ( 20.0)       
##   metastatic_solid_tumor = Checked (%)                           11 ( 24.4)       
##   aids = Checked (%)                                              0 (  0.0)       
##   penicillins = Checked (%)                                      12 ( 26.7)       
##   cephalosporins = Checked (%)                                   35 ( 77.8)       
##   carbapenems = Checked (%)                                       3 (  6.7)       
##   vancomycin = Checked (%)                                       32 ( 71.1)       
##   metronidazole = Checked (%)                                    27 ( 60.0)       
##   macrolides = Checked (%)                                        7 ( 15.6)       
##   quinolones = Checked (%)                                        2 (  4.4)       
##   other_2 = Checked (%)                                           2 (  4.4)       
##   clindamycin = Checked (%)                                       1 (  2.2)       
##   aminoglycosides = Checked (%)                                   5 ( 11.1)       
##   doxycycline = Checked (%)                                       0 (  0.0)       
##   trimethoprim_sulfamethoxazole = Checked (%)                     3 (  6.7)       
##   rifaximin = Checked (%)                                         7 ( 15.6)       
##   diet = npo (%)                                                 20 ( 44.4)       
##   d_sofa_admission (median [IQR])                              0.00 [-2.00, 2.00] 
##   d_sofa_stool (median [IQR])                                  0.00 [-2.00, 2.00] 
##                                                              Stratified by thirtyday_mortality_overall
##                                                               p      test   
##   n                                                                         
##   age (median [IQR])                                           0.756 nonnorm
##   sex_factor = Male (%)                                        0.540        
##   bmi (median [IQR])                                           0.325 nonnorm
##   race_factor (%)                                              0.001        
##      African American                                                       
##      Asian                                                                  
##      More than one race                                                     
##      White, Hispanic                                                        
##      White, non-Hispanic                                                    
##      NA                                                                     
##   cci_total_sc (median [IQR])                                  0.183 nonnorm
##   thirtyday_mortality_overall = Non-Survivor (%)              <0.001        
##   primary_dx_factor (%)                                        0.151        
##      Acute (on chronic) liver failure                                       
##      AMI/dysrhythmia                                                        
##      CHF/cardiogenic shock                                                  
##      CNS pathology                                                          
##      GI hemorrhage                                                          
##      Metabolic                                                              
##      Other                                                                  
##      Post-operative observation                                             
##      Respiratory failure, AHRF                                              
##      Respiratory failure, airway compromise                                 
##      Respiratory failure, ventilatory                                       
##      Sepsis (+/- septic shock)                                              
##   ards_factor = Yes (%)                                        0.001        
##   sepsis_factor = Sepsis (%)                                   0.034        
##   admit_from_factor (%)                                        0.002        
##      Cardiology                                                             
##      ED                                                                     
##      General Medicine                                                       
##      Liver                                                                  
##      Neurology                                                              
##      Oncology                                                               
##      OSH                                                                    
##      Surgery                                                                
##      NA                                                                     
##   covid_upon_admission = No (%)                                   NA        
##   sofa_score_total (median [IQR])                              0.001 nonnorm
##   ap2_total_score (median [IQR])                               0.008 nonnorm
##   day_collected (median [IQR])                                 0.777 nonnorm
##   bacterial_pneumonia = Checked (%)                            1.000        
##   fungal_pneumonia = Checked (%)                               1.000        
##   viral_pneumonia = Checked (%)                                0.862        
##   chronic_obstructive_pulmonary_disease_copd_1 = Checked (%)   0.227        
##   asthma_exacerbation = Checked (%)                            0.862        
##   lung_lobar_collapse = Unchecked (%)                             NA        
##   pulmonary_embolism = Checked (%)                             0.761        
##   hemoptysis = Checked (%)                                     1.000        
##   pancreatitis = Checked (%)                                   1.000        
##   infection_genitourinary_system = Checked (%)                 0.272        
##   infection_intra_abdominal = Checked (%)                      0.003        
##   infection_soft_tissue = Checked (%)                          0.177        
##   infection_cns = Checked (%)                                  0.596        
##   hepatic_failure_acute_fullminant = Checked (%)               0.170        
##   hepatic_failure_acute_on_chronic = Checked (%)               0.233        
##   diabetic_ketoacidosis = Checked (%)                          0.339        
##   acute_leukemia = Checked (%)                                 0.673        
##   cerebral_vascular_accident_1 = Checked (%)                   0.976        
##   acute_myocardial_infarction_nstemi_stemi = Checked (%)       0.862        
##   diffuse_alveolar_hemorrhage = Checked (%)                    1.000        
##   decompensated_heart_failure_pulmonary_oedema = Checked (%)   0.077        
##   pleural_effusion = Checked (%)                               0.001        
##   interstitial_lung_disease_exacerbation = Checked (%)         1.000        
##   organizing_pneumonia = Unchecked (%)                            NA        
##   acute_eosinophilic_pneumoniae = Unchecked (%)                   NA        
##   other = Checked (%)                                          0.745        
##   angioedema = Checked (%)                                     1.000        
##   acute_renal_failure = Checked (%)                            0.004        
##   altered_mental_status = Checked (%)                          0.538        
##   hypertensive_urgency = Checked (%)                           1.000        
##   hypertensive_emergency = Checked (%)                         0.862        
##   endocarditis = Checked (%)                                   0.596        
##   bacteremia = Checked (%)                                     0.578        
##   gastrointestinal_bleeding = Checked (%)                      0.862        
##   hemorrhagic_shock = Checked (%)                              0.673        
##   aspiration = Checked (%)                                     0.338        
##   central_line_associated_blood_steam_infection = Checked (%)  1.000        
##   prosthetic_joint_infection = Checked (%)                     1.000        
##   new_onset_atrial_fibrillation = Checked (%)                  0.596        
##   newly_diagnosed_solid_malignancy = Checked (%)               1.000        
##   myocardial_infract = Checked (%)                             1.000        
##   congestive_heart_failure = Checked (%)                       0.285        
##   peripheral_vascular_disease_cci = Checked (%)                0.161        
##   cerebrovascular_disease = Checked (%)                        0.077        
##   dementia = Checked (%)                                       0.764        
##   chronic_pulmonary_disease = Checked (%)                      0.377        
##   connective_tissue_disease_1 = Checked (%)                    0.690        
##   ulcer_disease = Checked (%)                                  0.862        
##   mild_liver_disease = Checked (%)                             0.132        
##   diabetes_without_complications = Checked (%)                 0.715        
##   diabetes_with_end_organ_damage = Checked (%)                 0.216        
##   hemiplegia = Checked (%)                                     0.849        
##   moderate_or_severe_renal_disease = Checked (%)               0.340        
##   solid_tumor_non_metastatic = Checked (%)                     0.583        
##   leukemia = Checked (%)                                       1.000        
##   lymhoma = Checked (%)                                        0.761        
##   moderate_or_severe_liver_disease = Checked (%)               0.152        
##   metastatic_solid_tumor = Checked (%)                        <0.001        
##   aids = Checked (%)                                           0.862        
##   penicillins = Checked (%)                                    0.182        
##   cephalosporins = Checked (%)                                 0.246        
##   carbapenems = Checked (%)                                    0.968        
##   vancomycin = Checked (%)                                     0.096        
##   metronidazole = Checked (%)                                  0.017        
##   macrolides = Checked (%)                                     0.724        
##   quinolones = Checked (%)                                     1.000        
##   other_2 = Checked (%)                                        0.690        
##   clindamycin = Checked (%)                                    1.000        
##   aminoglycosides = Checked (%)                                1.000        
##   doxycycline = Checked (%)                                    0.309        
##   trimethoprim_sulfamethoxazole = Checked (%)                  0.632        
##   rifaximin = Checked (%)                                      0.065        
##   diet = npo (%)                                               0.050        
##   d_sofa_admission (median [IQR])                              0.033 nonnorm
##   d_sofa_stool (median [IQR])                                  0.016 nonnorm
# Export to csv to then load in as a dataframe
write.csv(
  tableone_nocovid_print,
  "./Results/Table_One_30_Days_Mortality_train.csv",
  row.names = TRUE
)

# Clean table for paper
tableone_nocovid_print_clean <-
tableone_nocovid_print %>%
  as.data.frame() %>% 
  rownames_to_column(var = "variable") %>% 
  filter(variable != "thirtyday_mortality_overall...Non.Survivor....") %>% #distinct(variable)
  mutate(
    variable = dplyr::recode(
      variable,
      n = "Number of Patients",
      `age..median..IQR..` = "Age (median [IQR])",
      `sex_factor...Male....` = "Male (%)",
      `bmi..median..IQR..` = "Body Mass Index (median [IQR])",
      `race_factor....` = "Race (%)",
      `X...African.American` = " African American",
      `X...Asian` = "Asian",
      `X...More.than.one.race` = "More than one race",
      `X...White..Hispanic` = "White, Hispanic",
      `X...White..non.Hispanic` = "White, Non-Hispanic",
      `X...NA` = "NA",
      `cci_total_sc..median..IQR..` = "Charlson Comorbidity Index (median [IQR])",
      `primary_dx_factor....` = "Primary admission diagnosis (%)",
      `X...Acute..on.chronic..liver.failure` = "Acute chronic liver failure",
      `X...AMI.dysrhythmia` = "AMI dysrhytmia",
      `X...CHF.cardiogenic.shock` = "CHF cardiogenic shock",
      `X...CNS.pathology` = "CNS pathology",
      `X...GI.hemorrhage` = "GI hemorrhage",
      `X...Metabolic` = "Metabolic",
      `X...Other` = "Other Primary diagnosis",
      `X...Post.operative.observation` = "Post-operative observation",
      `X...Respiratory.failure..AHRF` = "Respiratory failure (AHRF)",
      `X...Respiratory.failure..airway.compromise` = "Respiratory failure, airway compromise",
      `X...Respiratory.failure..ventilatory` = "Respiratory failure, ventilatory",
      `X...Sepsis......septic.shock.` = "Sepsis, septic shock",
      `ards_factor...Yes....` = "Acute respiratory distress syndrome (%)",
      `sepsis_factor...Sepsis....` = "Sepsis (%)",
      `admit_from_factor....` = "Admitted from (%)",
      `X...Cardiology` = "Cardiology",
      `X...ED` = "Emergency Department",
      `X...General.Medicine` = "General Medicine",
      `X...Liver` = "Liver",
      `X...Neurology` = "Nuerology",
      `X...Oncology` = "Oncology",
      `X...OSH` = "Outside Hospital",
      `X...Surgery` = "Surgery",
      `X...NA.1` = "Unknown",
      `covid_upon_admission...No....` = "No Covid upon admission (%)",
      `sofa_score_total..median..IQR..` = "SOFA Score (median [IQR])",
      `ap2_total_score..median..IQR..` = "APACHE II Score (median [IQR])",
      `day_collected..median..IQR..` = "Day From Admission Stool Sample Collected (median [IQR])",
      `bacterial_pneumonia...Checked....` = "Bacterial Pneumonia (%)",
      `fungal_pneumonia...Checked....` = "Fungal Pneumonia (%)",
      `viral_pneumonia...Checked....` = "Viral Pneumonia (%)",
      `chronic_obstructive_pulmonary_disease_copd_1...Checked....` = "Chronic Obstructive Pulmonary Disease (COPD) (%)",
      `asthma_exacerbation...Checked....` = "Asthma exacerbation (%)",
      `lung_lobar_collapse...Unchecked....` = "Lung/lobar collapse (%)",
      `pulmonary_embolism...Checked....` = "Pulmonary embolism (%)",
      `hemoptysis...Checked....` = "Hemoptysis (%)",
      `pancreatitis...Checked....` = "Pancreatitis (%)",
      `infection_genitourinary_system...Checked....` = "Infection, genitourinary system (%)",
      `infection_intra_abdominal...Checked....` = "Infection, Intra-abdominal (%)",
      `infection_soft_tissue...Checked....` = "Infection, soft tissue (%)",
      `infection_cns...Checked....` = "Infection, CNS (%)",
      `hepatic_failure_acute_fullminant...Checked....` = "Hepatic failure, acute fullminant (%)",
      `hepatic_failure_acute_on_chronic...Checked....` = "Hepatic failure, acute on chronic (%)",
      `diabetic_ketoacidosis...Checked....` = "Diabetic ketoacidosis (%)",
      `acute_leukemia...Checked....` = "Acute leukemia (%)",
      `cerebral_vascular_accident_1...Checked....` = "Cerebreal vascular accident (%)",
      `acute_myocardial_infarction_nstemi_stemi...Checked....` = "Acute myocardial infarction (NSTEMI/STEMI) (%)",
      `diffuse_alveolar_hemorrhage...Checked....` = "Diffuse alveolar hemorrhage (%)",
      `decompensated_heart_failure_pulmonary_oedema...Checked....` = "Decompensated heart failure/Pulmonary oedema (%)",
      `pleural_effusion...Checked....` = "Pleural effusion (%)",
      `interstitial_lung_disease_exacerbation...Checked....` = "Interstitial lung disease exacerbation (%)",
      `organizing_pneumonia...Unchecked....` = "Organizing pneumonia (%)",
      `acute_eosinophilic_pneumoniae...Unchecked....` = "Acute eosinophilic pneumoniae (%)",
      `other...Checked....` = "Other (%)",
      `angioedema...Checked....` = "Angioedema (%)",
      `acute_renal_failure...Checked....` = "Acute renal failure (%)",
      `altered_mental_status...Checked....` = "Altered mental status (%)",
      `hypertensive_urgency...Checked....` = "Hypertensive urgency (%)",
      `hypertensive_emergency...Checked....` = "Hypertensive emergency (%)",
      `endocarditis...Checked....` = "Endocarditis (%)",
      `bacteremia...Checked....` = "Bacteremia (%)",
      `gastrointestinal_bleeding...Checked....` = "Gastrointestinal bleeding (%)",
      `hemorrhagic_shock...Checked....` = "Hemorrhagic shock (%)",
      `aspiration...Checked....` = "Aspiration (%)",
      `central_line_associated_blood_steam_infection...Checked....` = "Central line associated blood steam infection (%)",
      `prosthetic_joint_infection...Checked....` = "Prosthetic joint infection (%)",
      `new_onset_atrial_fibrillation...Checked....` = "New onset atrial fibrillation (%)",
      `newly_diagnosed_solid_malignancy...Checked....` = "Newly diagnosed solid malignancy (%)",
      `myocardial_infract...Checked....` = "Myocardial infract (%)",
      `congestive_heart_failure...Checked....` = "Congestive heart failure (%)",
      `peripheral_vascular_disease_cci...Checked....` = "Peripheral vascular disease (%)",
      `cerebrovascular_disease...Checked....` = "Cerebrovascular disease (%)",
      `dementia...Checked....` = "Dementia (%)",
      `chronic_pulmonary_disease...Checked....` = "Chronic pulmonary disease (%)",
      `connective_tissue_disease_1...Checked....` = "Connective tissue disease (%)",
      `ulcer_disease...Checked....` = "Ulcer disease (%)",
      `mild_liver_disease...Checked....` = "Mild liver disease (%)",
      `diabetes_without_complications...Checked....` = "Diabetes (without complications) (%)",
      `diabetes_with_end_organ_damage...Checked....` = "Diabetes (with end organ damage) (%)",
      `hemiplegia...Checked....` = "Hemiplegia (%)",
      `moderate_or_severe_renal_disease...Checked....` = "Moderate or severe renal disease (%)",
      `solid_tumor_non_metastatic...Checked....` = "Solid tumor (non-metastatic) (%)",
      `leukemia...Checked....` = "Leukemia (%)",
      `lymhoma...Checked....` = "Lymphoma (%)",
      `moderate_or_severe_liver_disease...Checked....` = "Moderate or severe liver disease (%)",
      `metastatic_solid_tumor...Checked....` = "Solid tumor (metastatic) (%)",
      `aids...Checked....` = "AIDS (%)",
      `penicillins...Checked....` = "Penicillins (%)",
      `cephalosporins...Checked....` = "Cephalosporins (%)",
      `carbapenems...Checked....` = "Carbapenems (%)",
      `vancomycin...Checked....` = "Vancomycin (%)",
      `metronidazole...Checked....` = "Metronidazole (%)",
      `macrolides...Checked....` = "Macrolides (%)",
      `quinolones...Checked....` = "Quinolones (%)",
      `other_2...Checked....` = "Other Antiobiotics (%)",
      `clindamycin...Checked....` = "Clindamycin (%)",
      `aminoglycosides...Checked....` = "Aminoglycosides (%)",
      `doxycycline...Checked....` = "Doxycycline (%)",
      `trimethoprim_sulfamethoxazole...Checked....` = "Trimethoprim-Sulfamethoxazole (%)",
      `rifaximin...Checked....` = "Rifaximin (%)",
      `diet...npo....` = "Diet (nothing by mouth) (%)",
      `d_sofa_admission..median..IQR..` = "SOFA from admission (median [IQR])",
      `d_sofa_stool..median..IQR..` = "SOFA from Stool Sample (median [IQR])"
    )
  ) %>% 
  column_to_rownames(var = "variable")

# Export to csv to then load in as a dataframe
write.csv(
  tableone_nocovid_print_clean,
  "./Results/Table_One_30_Days_Mortality_train_clean.csv",
  row.names = TRUE
)

# Import csv as dataframe
tableone_nocovid_csv <-
  read.csv("./Results/Table_One_30_Days_Mortality_train.csv",
    stringsAsFactors = FALSE
  )

# Filter for only p-values <= 0.3 to then include in multi-variable model
tableone_pval_filt <- tableone_nocovid_csv %>%
  dplyr::rename(variable = X) %>%
  mutate(
    p = ifelse(p == "<0.001", 0.001, p),
    p = as.numeric(p)
  ) %>%
  # dplyr::slice(2:5, 11, 26, 27, 38:40, 100:112) %>%
  filter(!grepl(variable, pattern = "^\\s")) %>% 
  janitor::clean_names()

tableone_pval_filt_vars <- tableone_pval_filt %>%
  filter(variable != "n") %>%
  select(variable) %>%
  mutate(
    variable = as.character(variable),
    variable = gsub(
      x = variable,
      pattern = "\\s\\(median \\[IQR\\]\\)|\\s\\(%\\)| = Yes| = [Cc]hecked| = Male| = [Uu]nchecked| = npo| = Sepsis| = None",
      fixed = FALSE,
      replacement = ""
    )
  ) %>%
  filter(variable %!in% c("thirtyday_mortality_overall = Non-Survivor", "covid_upon_admission = No")) %>%
  pull(variable)

tableone_nocovid_df_filt <-
  tableone_nocovid_df[, tableone_pval_filt_vars]

tableone_nocovid_df_filt <- tableone_nocovid_df_filt %>%
  bind_cols(
    micu_new_nocovid_oc %>% ungroup() %>%
      left_join(cri_rxmar_abx_long, by = "unique_id") %>%
      mutate(across(
        Cephalosporins:Quinolones, ~ replace_na(., "unchecked")
      )) %>%
      mutate(across(
        Cephalosporins:Quinolones, ~ as.factor(.)
      )) %>%
      select(unique_id, thirtyday_mortality_overall)
  ) %>%
  relocate(unique_id, .before = NULL) %>%
  mutate_all(as.character) %>% 
  pivot_longer(
    !c(unique_id:day_collected, thirtyday_mortality_overall),
    names_to = "variable",
    values_to = "value"
  ) %>%
  mutate(
    value = as.character(value),
    value = ifelse(value %in% c("Checked", "checked", "diet"), 1, 0)
  ) %>% # diet = 1, npo = 0
  pivot_wider(names_from = "variable", values_from = "value") %>% 
  mutate(age = as.numeric(age),
         bmi = as.numeric(bmi),
         cci_total_sc = as.numeric(cci_total_sc),
         sofa_score_total = as.numeric(sofa_score_total),
         ap2_total_score = as.numeric(ap2_total_score),
         day_collected = as.numeric(day_collected)) %>% 
  mutate_if(is.character, as.factor)

Univariate -Omics Statistics: Shannon Diversity, Enterococcus/Enterobacterales Relative Abundance

t_metaphlan_micu_nocovid <- metaphlan %>%
  mutate(taxid = as.character(taxid)) %>%
  ungroup() %>%
  right_join(
    micu_new_nocovid_oc %>% # contains both MICU and HD information
      ungroup() %>%
      select(db, ID, shotgunSeq_id, metabolomicsID, sepsis.factor) %>%
      distinct(shotgunSeq_id, .keep_all = TRUE),
    by = "shotgunSeq_id"
  ) %>%
  ungroup() %>%
  select(shotgunSeq_id, metabolomicsID, taxid, db, pctseqs, Total) %>%
  distinct() %>%
  ungroup() %>%
  mutate(pctseqs = as.numeric(pctseqs)) %>%
  filter(pctseqs >= 0.0001) %>%
  group_by(shotgunSeq_id) %>%
  dplyr::add_count(taxid, name = "totalSp") %>%
  mutate(
    seq_id_count = length(unique(shotgunSeq_id)),
    spPres = totalSp / seq_id_count
  ) %>%
  filter(spPres >= 0.10) %>%
  select(-c(Total, seq_id_count, spPres, totalSp)) %>%
  group_by(shotgunSeq_id) %>%
  mutate(pctseqs = pctseqs / sum(pctseqs))

t_metaphlan_micu_nocovid_mat <- t_metaphlan_micu_nocovid %>%
  distinct() %>%
  left_join(metaphlan %>% select(taxid, Species) %>% mutate(taxid = as.character(taxid)),
    relationship = "many-to-many"
  ) %>%
  pivot_wider(
    id_cols = c(shotgunSeq_id),
    names_from = "Species",
    values_from = "pctseqs",
    values_fill = 0,
    values_fn = sum
  ) %>%
  column_to_rownames(var = "shotgunSeq_id")

micu_nocovid_first_samps_omics <- micu_new_nocovid_oc %>%
  left_join(t_metaphlan_micu_nocovid, relationship = "many-to-many") %>%
  left_join(metab_quant_imp_tot_mM, relationship = "many-to-many")

micu_nocovid_first_samps_omics_light <-
  micu_nocovid_first_samps_omics %>%
  select(
    unique_id,
    shotgunSeq_id,
    metabolomicsID,
    taxid,
    pctseqs,
    compound,
    mvalue__mM,
    thirtyday_mortality_overall
  ) %>%
  pivot_wider(
    id_cols = c(
      unique_id,
      shotgunSeq_id,
      metabolomicsID,
      compound,
      mvalue__mM,
      thirtyday_mortality_overall
    ),
    names_from = "taxid",
    values_from = "pctseqs",
    values_fill = 0
  ) %>%
  pivot_longer(
    !c(
      unique_id,
      shotgunSeq_id,
      metabolomicsID,
      compound,
      mvalue__mM,
      thirtyday_mortality_overall
    ),
    names_to = "taxid",
    values_to = "pctseqs"
  )

# Create dataframe for all phylogentic levels of interest
phylo_rel_abd <- t_metaphlan_micu_nocovid %>%
  pivot_wider(
    id_cols = c(shotgunSeq_id, metabolomicsID, db),
    names_from = "taxid",
    values_from = "pctseqs",
    values_fill = 0
  ) %>%
  pivot_longer(!c(shotgunSeq_id, metabolomicsID, db),
    names_to = "taxid",
    values_to = "pctseqs"
  ) %>%
  left_join(
    micu_new_nocovid_oc %>%
      ungroup() %>%
      select(shotgunSeq_id, thirtyday_mortality_overall) %>%
      distinct(shotgunSeq_id,
        .keep_all = TRUE,
        by = "shotgunSeq_id"
      )
  ) %>%
  left_join(taxdmp %>% mutate(taxid = as.character(taxid))) %>%
  mutate(Species = paste(Kingdom, Phylum, Class, Order, Family, Genus, Species, sep = "|")) %>% 
  filter(grepl(pattern = "Enterococcus|Enterobacterales", x = Species)) %>%
  mutate(organism = case_when(
    grepl(pattern = "Enterococcus", x = Species) ~ "Enterococcus",
    grepl(pattern = "Enterobacterales", x = Species) ~ "Enterobacterales"
  )) %>%
  drop_na(organism) %>%
  select(shotgunSeq_id, thirtyday_mortality_overall, organism, pctseqs) %>%
  group_by(shotgunSeq_id, thirtyday_mortality_overall, organism) %>%
  summarise(pctseqs = sum(pctseqs)) %>%
  ungroup() %>%
  pivot_wider(
    names_from = "organism",
    values_from = "pctseqs",
    values_fill = 0
  ) %>%
  pivot_longer(
    !c(shotgunSeq_id, thirtyday_mortality_overall),
    names_to = "organism",
    values_to = "pctseqs"
  ) %>%
  mutate(thirtyday_mortality_overall = factor(
    thirtyday_mortality_overall,
    levels = c("Survivor", "Non-Survivor")
  ))

# Obtain stats for all phylogentic levels of interest
rel_abd_alpha_stats <- phylo_rel_abd %>%
  group_by(organism) %>%
  rstatix::wilcox_test(pctseqs ~ thirtyday_mortality_overall) %>%
  rstatix::adjust_pvalue(method = "BH") %>%
  mutate(p.adj = ifelse(p.adj < 0.001, 0.001, round(p.adj, 3)))

symnum.args <-
  list(
    cutpoints = c(0, 0.0001, 0.001, 0.01, 0.05, Inf),
    symbols = c("****", "***", "**", "*", "ns")
  )

# Alpha Diversity matrix: Shannon
alpha_shannon <-
  vegan::diversity(t_metaphlan_micu_nocovid_mat, index = "shannon") %>%
  as.data.frame() %>%
  rownames_to_column(var = "shotgunSeq_id") %>%
  dplyr::rename("Shannon" = ".")

# Write out shotgunSeq_id list
alpha_shannon %>%
  select(shotgunSeq_id) %>%
  write.csv(., "./Data/shotgunSeq_id_list.csv", row.names = FALSE)

# Enterococcus
set.seed(456)
gg_ecoc_rel_abd <- phylo_rel_abd %>%
  filter(organism == "Enterococcus") %>%
  group_by(organism) %>%
  ggplot(
    .,
    aes(
      x = thirtyday_mortality_overall,
      y = pctseqs,
      color = thirtyday_mortality_overall,
      fill = thirtyday_mortality_overall
    )
  ) +
  geom_boxplot(
    outlier.colour = NA,
    alpha = 0.35
  ) +
  geom_jitter(
    width = 0.2,
    size = 2.5,
    alpha = 0.65
  ) +
  stat_compare_means(
    comparisons = list(c("Survivor", "Non-Survivor")),
    tip.length = 0.01,
    symnum.args = symnum.args,
    method.args = list(
      alternative = "two.sided",
      exact = FALSE
    ),
    label.y = c(1.05)
  ) +
  theme_bw() +
  theme(
    panel.grid = eb(),
    axis.title.y = et(size = 14, color = "black"),
    axis.title.x = eb(),
    axis.text = et(size = 12, color = "black"),
    plot.margin = margin(
      # Top margin
      t = 5,
      # Right margin
      r = 5,
      # Bottom margin
      b = 5,
      # Left margin
      l = 5
      ),
    panel.border = eb(),
    axis.line = el(color = "black")
  ) +
  ylab(~ atop(paste(italic("Enterococcus")), paste("MetaPhlAn4 Relative Abundance"))) +
  ggsci::scale_fill_lancet() +
  ggsci::scale_color_lancet() +
  guides(
    fill = guide_legend("Outcome"),
    color = guide_legend("Outcome",
      override.aes = aes(label = "")
    )
  ) +
  scale_y_continuous(
    breaks = seq(0, 1, 0.1),
    expand = expansion(mult = c(0.01, 0.035)),
    labels = scales::percent_format(accuracy = 1)
  ) +
  coord_cartesian(xlim = c(1.1, 1.9))

gg_ecoc_rel_abd

pdf(
  file = "./Results/Enterococcus_Metaphlan_Outcome_30_Days_Mortality_train.pdf",
  height = 6,
  width = 7
)
gg_ecoc_rel_abd
invisible(invisible(dev.off()))

# Enterobacterales
set.seed(456)
gg_ebac_rel_abd <- phylo_rel_abd %>%
  filter(organism == "Enterobacterales") %>%
  group_by(organism) %>%
  ggplot(
    .,
    aes(
      x = thirtyday_mortality_overall,
      y = pctseqs,
      color = thirtyday_mortality_overall,
      fill = thirtyday_mortality_overall
    )
  ) +
  geom_boxplot(
    outlier.colour = NA,
    alpha = 0.35
  ) +
  geom_jitter(
    width = 0.2,
    size = 2.5,
    alpha = 0.65
  ) +
  stat_compare_means(
    comparisons = list(c("Survivor", "Non-Survivor")),
    tip.length = 0.01,
    symnum.args = symnum.args,
    method.args = list(
      alternative = "two.sided",
      exact = FALSE
    ),
    label.y = c(1.05)
  ) +
  theme_bw() +
  theme(
    panel.grid = eb(),
    axis.title.y = et(size = 14, color = "black"),
    axis.title.x = eb(),
    axis.text = et(size = 12, color = "black"),
    plot.margin = margin(
      # Top margin
      t = 5,
      # Right margin
      r = 5,
      # Bottom margin
      b = 5,
      # Left margin
      l = 5
      ),
    panel.border = eb(),
    axis.line = el(color = "black")
  ) +
  ylab(~ atop(paste(italic("Enterobacterales")), paste("MetaPhlAn4 Relative Abundance"))) +
  ggsci::scale_fill_lancet() +
  ggsci::scale_color_lancet() +
  guides(
    fill = guide_legend("Outcome"),
    color = guide_legend("Outcome",
      override.aes = aes(label = "")
    )
  ) +
  scale_y_continuous(
    breaks = seq(0, 1, 0.1),
    expand = expansion(mult = c(0.01, 0.035)),
    labels = scales::percent_format(accuracy = 1)
  ) +
  coord_cartesian(xlim = c(1.1, 1.9))

gg_ebac_rel_abd

pdf(
  file = "./Results/Enterobacterales_Metaphlan_Outcome_30_Days_Mortality_train.pdf",
  height = 6,
  width = 7
)
gg_ebac_rel_abd
invisible(invisible(dev.off()))

dominations <- phylo_rel_abd %>%
  group_by(shotgunSeq_id) %>%
  mutate(
    enterococcus_domination = ifelse(organism == "Enterococcus" &
      pctseqs >= 0.30, 1, 0),
    enterobacterales_domination = ifelse(organism == "Enterobacterales" &
      pctseqs >= 0.05, 1, 0)
  ) %>%
  pivot_longer(!c(shotgunSeq_id:pctseqs),
    names_to = "dominations",
    values_to = "outcome"
  ) %>%
  filter(
    grepl(x = organism, pattern = "Enterococcus") &
      grepl(x = dominations, pattern = "enterococcus_domination") |
      grepl(x = organism, pattern = "Enterobacterales") &
        grepl(x = dominations, pattern = "enterobacterales_domination")
  ) %>%
  pivot_wider(!c(organism, pctseqs),
    names_from = "dominations",
    values_from = "outcome"
  )

micu_nocovid_first_samps_omics_light_filt_wide <-
  micu_nocovid_first_samps_omics_light %>%
  left_join(taxdmp %>% mutate(taxid = as.character(taxid))) %>%
  mutate(Genus = paste0(Phylum, "-", Order, "-", Family, "-", Genus, "-", Species)) %>%
  pivot_wider(
    id_cols = c(unique_id, compound, mvalue__mM),
    names_from = "Genus",
    values_from = "pctseqs",
    values_fn = sum
  ) %>%
  relocate(compound, mvalue__mM, .before = unique_id) %>%
  pivot_wider(
    id_cols = c(unique_id:last_col()),
    names_from = "compound",
    values_from = "mvalue__mM"
  ) %>%
  left_join(
    dominations %>%
      select(
        shotgunSeq_id,
        enterococcus_domination,
        enterobacterales_domination
      ) %>%
      left_join(
        micu_new_nocovid_oc %>%
          ungroup() %>%
          select(unique_id, shotgunSeq_id) %>%
          distinct(shotgunSeq_id, .keep_all = TRUE),
        by = "shotgunSeq_id"
      )
  )

# Enterobacterales + Enterococcus
set.seed(456)
gg_ecoc_ebac_rel_abd <- phylo_rel_abd %>%
  mutate(
    comps = paste(thirtyday_mortality_overall, organism, sep = "\n"),
    comps = factor(
      comps,
      levels = c(
        "Survivor\nEnterococcus",
        "Non-Survivor\nEnterococcus",
        "Survivor\nEnterobacterales",
        "Non-Survivor\nEnterobacterales"
      )
    )
  ) %>%
  ggplot(
    .,
    aes(
      x = comps,
      y = pctseqs,
      color = thirtyday_mortality_overall,
      fill = thirtyday_mortality_overall
    )
  ) +
  geom_boxplot(
    outlier.colour = NA,
    alpha = 0.35
  ) +
  geom_jitter(
    width = 0.2,
    size = 2.5,
    alpha = 0.65
  ) +
  stat_compare_means(
    comparisons = list(
      c("Survivor\nEnterococcus", "Non-Survivor\nEnterococcus"),
      c(
        "Survivor\nEnterobacterales",
        "Non-Survivor\nEnterobacterales"
      )
    ),
    tip.length = 0.05,
    bracket.size = 0.5,
    symnum.args = symnum.args,
    method.args = list(
      alternative = "two.sided",
      exact = FALSE
    ),
    label.y = c(1.05)
  ) +
  theme_bw() +
  theme(
    panel.grid = eb(),
    axis.title.y = et(size = 16, color = "black"),
    axis.title.x = eb(),
    axis.text = et(size = 14, color = "black"),
    plot.margin = margin(
      # Top margin
      t = 5,
      # Right margin
      r = 5,
      # Bottom margin
      b = 5,
      # Left margin
      l = 5
    ),
    panel.border = eb(),
    axis.line = el(color = "black"),
    legend.position = "none"
  ) +
  ylab(paste("MetaPhlAn4 Relative Abundance\n")) +
  ggsci::scale_fill_lancet() +
  ggsci::scale_color_lancet() +
  guides(
    fill = guide_legend("Outcome"),
    color = guide_legend("Outcome",
      override.aes = aes(label = "")
    )
  ) +
  scale_y_continuous(
    breaks = seq(0, 1, 0.1),
    expand = expansion(mult = c(0.01, 0.05)),
    labels = scales::percent_format(accuracy = 1)
  ) +
  coord_cartesian(xlim = c(1.1, 3.9))

gg_ecoc_ebac_rel_abd

pdf(
  file = "./Results/Enterococcus_Enterobacterales_Metaphlan_Outcome_30_Days_Mortality_train.pdf",
  height = 3.5,
  width = 8
)
gg_ecoc_ebac_rel_abd
invisible(dev.off())

MaAsLin2 Differential Abundance

# Aggregate to Family level
maaslin_mat <- t_metaphlan_micu_nocovid %>%
  distinct() %>%
  left_join(taxdmp %>% mutate(taxid = as.character(taxid))) %>%
  mutate(Family = ifelse(Family == "", str_extract(Species, pattern = "([^\\s]+)"), Family)) %>%
  group_by(shotgunSeq_id, metabolomicsID, db, Family) %>%
  summarise(pctseqs = sum(pctseqs)) %>%
  pivot_wider(
    id_cols = c(shotgunSeq_id),
    names_from = "Family",
    values_from = "pctseqs",
    values_fill = 0,
    values_fn = sum
  ) %>%
  column_to_rownames(var = "shotgunSeq_id")

# Run Maaslin without covariates
set.seed(123)
maaslin_no_covariates <- Maaslin2(
  input_data = maaslin_mat,
  input_metadata = data.frame(
    t_metaphlan_micu_nocovid_mat %>%
      rownames_to_column(var = "shotgunSeq_id") %>%
      select(shotgunSeq_id) %>%
      left_join(micu_new_nocovid_oc %>%
        select(shotgunSeq_id, unique_id)) %>%
      left_join(
        tableone_nocovid_df_filt %>%
          labelled::remove_labels() %>%
          janitor::clean_names() %>%
          mutate(
            race_factor = as.character(race_factor),
            race_factor = ifelse(
              race_factor %in% c("Asian", "More than one race"),
              "Other",
              race_factor
            )
          )
      ) %>%
      column_to_rownames(var = "shotgunSeq_id") %>%
      select(-c(unique_id)) %>%
      mutate(race_factor = as.factor(race_factor))
  ),
  output = "/Users/nick/Documents/GitHub/DFI-Bioinformatics/Medical_Intensive_Care_Unit/Results/Maaslin_Base/",
  min_abundance = 0.001,
  # At least 0.1% abundance
  min_prevalence = 0.10,
  # Taxa found in at least 10% of samples
  min_variance = -Inf,
  normalization = "NONE",
  transform = "NONE",
  analysis_method = "LM",
  max_significance = 0.05,
  # p.adj <= 0.05 (qval = padjust)
  random_effects = NULL,
  fixed_effects = c("thirtyday_mortality_overall"),
  correction = "BH",
  standardize = TRUE,
  cores = 12,
  plot_heatmap = TRUE,
  plot_scatter = TRUE,
  heatmap_first_n = 50,
  reference = c("thirtyday_mortality_overall,Survivor")
)
## [1] "Warning: Deleting existing log file: /Users/nick/Documents/GitHub/DFI-Bioinformatics/Medical_Intensive_Care_Unit/Results/Maaslin_Base//maaslin2.log"
## 2024-06-03 14:34:05 INFO::Writing function arguments to log file
## 2024-06-03 14:34:05 INFO::Verifying options selected are valid
## 2024-06-03 14:34:05 INFO::Determining format of input files
## 2024-06-03 14:34:05 INFO::Input format is data samples as rows and metadata samples as rows
## 2024-06-03 14:34:05 INFO::Formula for fixed effects: expr ~  thirtyday_mortality_overall
## 2024-06-03 14:34:05 INFO::Filter data based on min abundance and min prevalence
## 2024-06-03 14:34:05 INFO::Total samples in data: 147
## 2024-06-03 14:34:05 INFO::Min samples required with min abundance for a feature not to be filtered: 14.700000
## 2024-06-03 14:34:05 INFO::Total filtered features: 55
## 2024-06-03 14:34:05 INFO::Filtered feature names from abundance and prevalence filtering: Candidatus.Borkfalkiaceae, Mycobacteriaceae, Pseudomonadaceae, Coriobacteriia, Firmicutes, Synergistaceae, Aminobacteriaceae, Candidatus.Nanoperiomorbaceae, Candidatus.Nanosynbacteraceae, Candidatus.Saccharimonadaceae, Carnobacteriaceae, Corynebacteriaceae, Fusobacteriaceae, Gemella, Kytococcaceae, Peptococcaceae, Promicromonosporaceae, Propionibacteriaceae, Selenomonadaceae, TM7, Morganellaceae, Turicibacteraceae, Ezakiella, Campylobacteraceae, Bacillota, Flavobacteriales, Dethiosulfovibrionaceae, Massilistercora, Tissierellaceae, Pasteurellaceae, Moraxellaceae, Selenomonadales, Dysgonomonadaceae, Alcaligenaceae, Bacilli, Yeguiaceae, Metamycoplasmataceae, Lawsonellaceae, Opitutales, Negativibacillus, Porphyromonadaceae, Tropherymataceae, Bacteroidales, Sporanaerobacteraceae, Leptotrichiaceae, Neisseriaceae, Aerococcaceae, Lentisphaeria, Fenollaria, Acetobacteraceae, Xanthomonadaceae, Victivallaceae, Oxalobacteraceae, Comamonadaceae, Nitrobacteraceae
## 2024-06-03 14:34:05 INFO::Total filtered features with variance filtering: 0
## 2024-06-03 14:34:05 INFO::Filtered feature names from variance filtering:
## 2024-06-03 14:34:05 INFO::Running selected normalization method: NONE
## 2024-06-03 14:34:05 INFO::Applying z-score to standardize continuous metadata
## 2024-06-03 14:34:05 INFO::Running selected transform method: NONE
## 2024-06-03 14:34:05 INFO::Running selected analysis method: LM
## 2024-06-03 14:34:05 INFO::Creating cluster of 12 R processes
## 2024-06-03 14:34:14 INFO::Counting total values for each feature
## 2024-06-03 14:34:14 WARNING::Deleting existing residuals file: /Users/nick/Documents/GitHub/DFI-Bioinformatics/Medical_Intensive_Care_Unit/Results/Maaslin_Base//residuals.rds
## 2024-06-03 14:34:14 INFO::Writing residuals to file /Users/nick/Documents/GitHub/DFI-Bioinformatics/Medical_Intensive_Care_Unit/Results/Maaslin_Base//residuals.rds
## 2024-06-03 14:34:14 WARNING::Deleting existing fitted file: /Users/nick/Documents/GitHub/DFI-Bioinformatics/Medical_Intensive_Care_Unit/Results/Maaslin_Base//fitted.rds
## 2024-06-03 14:34:14 INFO::Writing fitted values to file /Users/nick/Documents/GitHub/DFI-Bioinformatics/Medical_Intensive_Care_Unit/Results/Maaslin_Base//fitted.rds
## 2024-06-03 14:34:14 INFO::Writing all results to file (ordered by increasing q-values): /Users/nick/Documents/GitHub/DFI-Bioinformatics/Medical_Intensive_Care_Unit/Results/Maaslin_Base//all_results.tsv
## 2024-06-03 14:34:14 INFO::Writing the significant results (those which are less than or equal to the threshold of 0.050000 ) to file (ordered by increasing q-values): /Users/nick/Documents/GitHub/DFI-Bioinformatics/Medical_Intensive_Care_Unit/Results/Maaslin_Base//significant_results.tsv
## 2024-06-03 14:34:14 INFO::Writing heatmap of significant results to file: /Users/nick/Documents/GitHub/DFI-Bioinformatics/Medical_Intensive_Care_Unit/Results/Maaslin_Base//heatmap.pdf
## [1] "There are no associations to plot!"
## 2024-06-03 14:34:14 INFO::Writing association plots (one for each significant association) to output folder: /Users/nick/Documents/GitHub/DFI-Bioinformatics/Medical_Intensive_Care_Unit/Results/Maaslin_Base/
## [1] "There are no associations to plot!"
# Run Maaslin2 with Covariates
set.seed(123)
maaslin_model <- Maaslin2(
  input_data = maaslin_mat,
  input_metadata = data.frame(
    t_metaphlan_micu_nocovid_mat %>%
      rownames_to_column(var = "shotgunSeq_id") %>%
      select(shotgunSeq_id) %>%
      left_join(micu_new_nocovid_oc %>%
        select(shotgunSeq_id, unique_id)) %>%
      left_join(
        tableone_nocovid_df_filt %>%
          labelled::remove_labels() %>%
          janitor::clean_names() %>%
          mutate(
            race_factor = as.character(race_factor),
            race_factor = ifelse(
              race_factor %in% c("Asian", "More than one race"),
              "Other",
              race_factor
            )
          )
      ) %>%
      column_to_rownames(var = "shotgunSeq_id") %>%
      select(-c(unique_id)) %>%
      mutate(race_factor = as.factor(race_factor))
  ),
  output = "/Users/nick/Documents/GitHub/DFI-Bioinformatics/Medical_Intensive_Care_Unit/Results/Maaslin_Covariate",
  min_abundance = 0.001,
  # At least 0.1% abundance
  min_prevalence = 0.10,
  # Taxa found in at least 10% of samples
  min_variance = -Inf,
  normalization = "NONE",
  transform = "NONE",
  analysis_method = "LM",
  max_significance = 0.05,
  # p.adj <= 0.05 (qval = padjust)
  random_effects = NULL,
  fixed_effects = c(
    "thirtyday_mortality_overall",
    "sex_factor",
    "age",
    "cci_total_sc",
    "ards_factor",
    "sepsis_factor",
    "sofa_score_total",
    "day_collected",
    "race_factor",
    "diet"
  ),
  correction = "BH",
  standardize = TRUE,
  cores = 12,
  plot_heatmap = TRUE,
  plot_scatter = TRUE,
  heatmap_first_n = 50,
  reference = c("thirtyday_mortality_overall,Survivor", "race_factor,White")
)
## [1] "Warning: Deleting existing log file: /Users/nick/Documents/GitHub/DFI-Bioinformatics/Medical_Intensive_Care_Unit/Results/Maaslin_Covariate/maaslin2.log"
## 2024-06-03 14:34:14 INFO::Writing function arguments to log file
## 2024-06-03 14:34:14 INFO::Verifying options selected are valid
## 2024-06-03 14:34:14 INFO::Determining format of input files
## 2024-06-03 14:34:14 INFO::Input format is data samples as rows and metadata samples as rows
## 2024-06-03 14:34:14 INFO::Formula for fixed effects: expr ~  thirtyday_mortality_overall + sex_factor + age + cci_total_sc + ards_factor + sepsis_factor + sofa_score_total + day_collected + race_factor + diet
## 2024-06-03 14:34:14 INFO::Filter data based on min abundance and min prevalence
## 2024-06-03 14:34:14 INFO::Total samples in data: 147
## 2024-06-03 14:34:14 INFO::Min samples required with min abundance for a feature not to be filtered: 14.700000
## 2024-06-03 14:34:14 INFO::Total filtered features: 55
## 2024-06-03 14:34:14 INFO::Filtered feature names from abundance and prevalence filtering: Candidatus.Borkfalkiaceae, Mycobacteriaceae, Pseudomonadaceae, Coriobacteriia, Firmicutes, Synergistaceae, Aminobacteriaceae, Candidatus.Nanoperiomorbaceae, Candidatus.Nanosynbacteraceae, Candidatus.Saccharimonadaceae, Carnobacteriaceae, Corynebacteriaceae, Fusobacteriaceae, Gemella, Kytococcaceae, Peptococcaceae, Promicromonosporaceae, Propionibacteriaceae, Selenomonadaceae, TM7, Morganellaceae, Turicibacteraceae, Ezakiella, Campylobacteraceae, Bacillota, Flavobacteriales, Dethiosulfovibrionaceae, Massilistercora, Tissierellaceae, Pasteurellaceae, Moraxellaceae, Selenomonadales, Dysgonomonadaceae, Alcaligenaceae, Bacilli, Yeguiaceae, Metamycoplasmataceae, Lawsonellaceae, Opitutales, Negativibacillus, Porphyromonadaceae, Tropherymataceae, Bacteroidales, Sporanaerobacteraceae, Leptotrichiaceae, Neisseriaceae, Aerococcaceae, Lentisphaeria, Fenollaria, Acetobacteraceae, Xanthomonadaceae, Victivallaceae, Oxalobacteraceae, Comamonadaceae, Nitrobacteraceae
## 2024-06-03 14:34:14 INFO::Total filtered features with variance filtering: 0
## 2024-06-03 14:34:14 INFO::Filtered feature names from variance filtering:
## 2024-06-03 14:34:14 INFO::Running selected normalization method: NONE
## 2024-06-03 14:34:14 INFO::Applying z-score to standardize continuous metadata
## 2024-06-03 14:34:14 INFO::Running selected transform method: NONE
## 2024-06-03 14:34:14 INFO::Running selected analysis method: LM
## 2024-06-03 14:34:14 INFO::Creating cluster of 12 R processes
## 2024-06-03 14:34:32 INFO::Counting total values for each feature
## 2024-06-03 14:34:32 WARNING::Deleting existing residuals file: /Users/nick/Documents/GitHub/DFI-Bioinformatics/Medical_Intensive_Care_Unit/Results/Maaslin_Covariate/residuals.rds
## 2024-06-03 14:34:32 INFO::Writing residuals to file /Users/nick/Documents/GitHub/DFI-Bioinformatics/Medical_Intensive_Care_Unit/Results/Maaslin_Covariate/residuals.rds
## 2024-06-03 14:34:32 WARNING::Deleting existing fitted file: /Users/nick/Documents/GitHub/DFI-Bioinformatics/Medical_Intensive_Care_Unit/Results/Maaslin_Covariate/fitted.rds
## 2024-06-03 14:34:32 INFO::Writing fitted values to file /Users/nick/Documents/GitHub/DFI-Bioinformatics/Medical_Intensive_Care_Unit/Results/Maaslin_Covariate/fitted.rds
## 2024-06-03 14:34:32 INFO::Writing all results to file (ordered by increasing q-values): /Users/nick/Documents/GitHub/DFI-Bioinformatics/Medical_Intensive_Care_Unit/Results/Maaslin_Covariate/all_results.tsv
## 2024-06-03 14:34:32 INFO::Writing the significant results (those which are less than or equal to the threshold of 0.050000 ) to file (ordered by increasing q-values): /Users/nick/Documents/GitHub/DFI-Bioinformatics/Medical_Intensive_Care_Unit/Results/Maaslin_Covariate/significant_results.tsv
## 2024-06-03 14:34:32 INFO::Writing heatmap of significant results to file: /Users/nick/Documents/GitHub/DFI-Bioinformatics/Medical_Intensive_Care_Unit/Results/Maaslin_Covariate/heatmap.pdf
## 2024-06-03 14:34:32 INFO::Writing association plots (one for each significant association) to output folder: /Users/nick/Documents/GitHub/DFI-Bioinformatics/Medical_Intensive_Care_Unit/Results/Maaslin_Covariate
## 2024-06-03 14:34:32 INFO::Plotting associations from most to least significant, grouped by metadata
## 2024-06-03 14:34:32 INFO::Plotting data for metadata number 1, day_collected
## 2024-06-03 14:34:32 INFO::Creating scatter plot for continuous data, day_collected vs Coprobacillaceae
## 2024-06-03 14:34:34 INFO::Plotting data for metadata number 2, sofa_score_total
## 2024-06-03 14:34:34 INFO::Creating scatter plot for continuous data, sofa_score_total vs Lachnospiraceae
## 2024-06-03 14:34:34 INFO::Creating scatter plot for continuous data, sofa_score_total vs Oscillospiraceae
## 2024-06-03 14:34:34 INFO::Creating scatter plot for continuous data, sofa_score_total vs Lactobacillaceae
## 2024-06-03 14:34:35 INFO::Plotting data for metadata number 3, race_factor
## 2024-06-03 14:34:35 INFO::Creating boxplot for categorical data, race_factor vs Rikenellaceae

# Manual p-value adjustment for specific comparsisons
maaslin2_all_results <- maaslin_model$results

maaslin2_results <-
  maaslin2_all_results %>% filter(metadata == "thirtyday_mortality_overall") # Discard covariate associations

maaslin2_results$qval <-
  p.adjust(maaslin2_results$pval, method = "BH") # FDR correction using 'BH'

Quant Metabolites Boxplots

metab_quant_converted <- metab_quant_imp_tot_mM %>%
  right_join(
    micu_new_nocovid_oc %>%
      select(metabolomicsID, thirtyday_mortality_overall, sepsis.factor)
  ) %>%
  select(
    metabolomicsID,
    compound,
    mvalue__mM,
    thirtyday_mortality_overall,
    sepsis.factor
  ) %>%
  drop_na(compound)

metab_boxplot <-
  metab_quant_converted %>%
  ungroup() %>%
  mutate(
    class = case_when(
      compound %in% c(
        "taurocholic acid",
        "glycocholic acid",
        "allocholic acid",
        "alpha-muricholic acid",
        "beta-muricholic acid",
        "omega-muricholic acid",
        "ursocholic acid",
        "glycochenodeoxycholic acid",
        "taurochenodeoxycholic acid"
      ) ~ "Conjugated Primary Bile Acid",
      compound %in% c("cholic acid", "chenodeoxycholic acid") ~ "Primary Bile Acid",
      compound %in% c(
        "3-oxolithocholic acid",
        "alloisolithocholic acid",
        "deoxycholic acid",
        "isodeoxycholic acid",
        "lithocholic acid",
        "ursodeoxycholic acid"
      ) ~ "Secondary Bile Acid",
      compound %in% c(
        "threonine",
        "glycine",
        "tyrosine",
        "tyramine",
        "serine",
        "leucine",
        "isoleucine",
        "valine",
        "phenylalanine",
        "alanine",
        "proline",
        "aspartate",
        "methionine",
        "glutamate",
        "lysine",
        "cysteine",
        "tryptophan"
      ) ~ "Amino Acid",
      compound %in% c(
        "acetate",
        "butyrate",
        "succinate",
        "propionate",
        "5-aminovalerate"
      ) ~ "Fatty Acid",
      compound %in% c(
        "kynurenic acid",
        "anthranilic acid",
        "kynurenine",
        "tryptamine"
      ) ~ "Kynurenine Metabolite",
      compound == "desaminotyrosine" ~ "Phenolic Aromatic",
      compound == "niacin" ~ "B-Vitamin",
      TRUE ~ "Indole"
    ),
    compound = case_when(
      class == "Conjugated Primary Bile Acid" ~ paste(str_to_title(compound), "(1˚Conj. BA)"),
      class == "Primary Bile Acid" ~ paste(str_to_title(compound), "(1˚ BA)"),
      class == "Secondary Bile Acid" ~ paste(str_to_title(compound), "(2˚ BA)"),
      class == "Fatty Acid" & compound == "succinate" ~ paste(str_to_title(compound), "(FA)"),
      class == "Fatty Acid" ~ paste(str_to_title(compound), "(SCFA)"),
      class == "Amino Acid" ~ paste(str_to_title(compound), "(AA)"),
      class == "Phenolic Aromatic" ~ paste(str_to_title(compound), "(Phen. Arom.)"),
      class == "Indole" ~ paste(str_to_title(compound), "(Indole)"),
      class == "Kynurenine Metabolite" ~ paste(str_to_title(compound), "(Kyn. Metab.)"),
      class == "B-Vitamin" ~ paste(str_to_title(compound), "(B-Vitamin)")
    )
  ) %>%
  drop_na() %>%
  mutate(
    compound = factor(
      compound,
      levels = c(
        "Acetate (SCFA)",
        "Butyrate (SCFA)",
        "Propionate (SCFA)",
        "Succinate (FA)",
        "5-Aminovalerate (SCFA)",
        "Chenodeoxycholic Acid (1˚ BA)",
        "Cholic Acid (1˚ BA)",
        "Allocholic Acid (1˚Conj. BA)",
        "Alpha-Muricholic Acid (1˚Conj. BA)",
        "Beta-Muricholic Acid (1˚Conj. BA)",
        "Glycochenodeoxycholic Acid (1˚Conj. BA)",
        "Glycocholic Acid (1˚Conj. BA)",
        "Omega-Muricholic Acid (1˚Conj. BA)",
        "Taurochenodeoxycholic Acid (1˚Conj. BA)",
        "Taurocholic Acid (1˚Conj. BA)",
        "Ursocholic Acid (1˚Conj. BA)",
        "3-Oxolithocholic Acid (2˚ BA)",
        "Alloisolithocholic Acid (2˚ BA)",
        "Deoxycholic Acid (2˚ BA)",
        "Isodeoxycholic Acid (2˚ BA)",
        "Lithocholic Acid (2˚ BA)",
        "Ursodeoxycholic Acid (2˚ BA)",
        "Cysteine (AA)",
        "Glycine (AA)",
        "Phenylalanine (AA)",
        "Proline (AA)",
        "Tryptophan (AA)",
        "Tyramine (AA)",
        "Tyrosine (AA)",
        "5-Hydroxyindoleacetate (Indole)",
        "Melatonin (Indole)",
        "Serotonin (Indole)",
        "Indole-3-Acetamide (Indole)",
        "Indole-3-Acetate (Indole)",
        "Indole-3-Lactate (Indole)",
        "Indole (Indole)",
        "Indole-3-Carboxaldehyde (Indole)",
        "Indole-3-Propionate (Indole)",
        "Indole-3-Acrylate (Indole)",
        "Desaminotyrosine (Phen. Arom.)",
        "Anthranilic Acid (Kyn. Metab.)",
        "Kynurenic Acid (Kyn. Metab.)",
        "Kynurenine (Kyn. Metab.)",
        "Tryptamine (Kyn. Metab.)",
        "Niacin (B-Vitamin)"
      )
    ),
    class = factor(
      class,
      levels = c(
        "Fatty Acid",
        "Primary Bile Acid",
        "Conjugated Primary Bile Acid",
        "Secondary Bile Acid",
        "Amino Acid",
        "Indole",
        "Phenolic Aromatic",
        "Kynurenine Metabolite",
        "B-Vitamin"
      )
    )
  )


metab_boxplot_stats <-
  metab_boxplot %>%
  group_by(class, compound) %>%
  rstatix::wilcox_test(
    mvalue__mM ~ thirtyday_mortality_overall,
    p.adjust.method = "none",
    alternative = "two.sided"
  ) %>%
  rstatix::adjust_pvalue(method = "BH") %>%
  rstatix::add_significance(
    "p.adj",
    cutpoints = c(0, 0.0001, 0.001, 0.01, 0.05, 0.1, 1),
    symbols = c("****", "***", "**", "*", "0.1", "ns")
  ) %>%
  mutate(
    p.adj = round(p.adj, 2),
    p.adj = ifelse(p.adj < 0.001, "p.adj < 0.001", paste("p.adj = ", round(p.adj, 2)))
  ) %>%
  add_xy_position() %>%
  mutate(y.position = log(y.position, base = 10) * 1.25)

# Boxplot for all compounds
set.seed(123) # for consistent jittering of points
gg_metab_boxplot <-
  ggboxplot(
    metab_boxplot,
    x = "thirtyday_mortality_overall",
    y = "mvalue__mM",
    fill = "thirtyday_mortality_overall",
    color = "thirtyday_mortality_overall",
    alpha = 0.65,
    outlier.shape = NA,
    facet.by = c("class", "compound")
  ) +
  theme(
    legend.text = et(size = 12, color = "black"),
    legend.title = et(size = 14, color = "black"),
    axis.text.x = eb(),
    axis.title.x = eb(),
    axis.title.y = et(size = 12, color = "black"),
    panel.border = eb(),
    strip.background = er(colour = "white", fill = "white"),
  ) +
  geom_hline(yintercept = 0) +
  geom_segment(aes(
    x = 0.35,
    y = 0,
    xend = 0.35,
    yend = Inf
  )) +
  facet_wrap(~compound, scales = "fixed") +
  stat_pvalue_manual(metab_boxplot_stats,
    label = "p.adj",
    tip.length = 0.015
  ) +
  geom_point(
    data = metab_boxplot,
    aes(x = thirtyday_mortality_overall, y = mvalue__mM, color = thirtyday_mortality_overall),
    position = position_jitter(width = 0.2),
    size = 2,
    alpha = 0.65
  ) +
  ggsci::scale_fill_lancet() +
  ggsci::scale_color_lancet() +
  scale_y_log10(
    limits = c(0.0001, 1000),
    labels = c("0.0001", "0.001", "0.01", "0.1", "1", "10", "100", "1000"),
    breaks = c(0.0001, 0.001, 0.01, 0.1, 1, 10, 100, 1000),
    expand = expansion(mult = c(0.1, 0.2))
  ) +
  ylab("Concentration (mM)\n") +
  labs(
    color = "Outcome",
    fill = "Outcome"
  )

gg_metab_boxplot

cairo_pdf(
  filename = "./Results/Quant_Boxplots_Total_30_Days_Mortality_train.pdf",
  width = 14,
  height = 10,
  onefile = FALSE
)
gg_metab_boxplot
invisible(invisible(dev.off()))

Cutpoint Analysis: Metabolic Dysbiosis Score

# Cutpoint dataframe
cutpoints_df <- metab_quant_imp_tot_mM %>%
  pivot_wider(
    id_cols = c(metabolomicsID),
    names_from = "compound",
    values_from = "mvalue__mM"
  ) %>%
  group_by(metabolomicsID) %>%
  pivot_longer(!c(metabolomicsID),
    names_to = "compound",
    values_to = "mvalue__mM"
  ) %>%
  right_join(micu_new_nocovid_oc %>% select(metabolomicsID, thirtyday_mortality_overall)) %>%
  group_by(compound) %>%
  mutate(n = length(compound)) %>%
  ungroup() %>%
  mutate(p = length(unique(metabolomicsID))) %>%
  mutate(
    thirtyday_mortality_overall_class = ifelse(thirtyday_mortality_overall == "Survivor", 0, 1)
  )

# Optimal cutpoints
# Create function to handle any errors during map function
safe_cutpointr <- possibly(.f = cutpointr, otherwise = "Error")

# set.seed(123)
cutpoints <-
  cutpoints_df %>%
  group_by(compound) %>%
  group_map(
    ~ safe_cutpointr(
      .,
      mvalue__mM,
      thirtyday_mortality_overall_class,
      compound,
      method = maximize_metric,
      metric = youden,
      pos_class = 1,
      neg_class = 0,
      boot_runs = 10,
      boot_stratify = TRUE,
      use_midpoints = TRUE,
      na.rm = TRUE
    ),
    .keep = TRUE
  )

cutpoints_unnest <- cutpoints %>%
  map_df(as_tibble)

cutpoints_unnest %>%
  select(
    compound = subgroup,
    direction,
    optimal_cutpoint_mM = optimal_cutpoint
  ) %>%
  write.csv(., "./Results/MDScore_Cutpoints_train.csv")

# Summary table
cutpoints_unnest_summary <-
  cutpoints_unnest %>%
  group_by(subgroup, pos_class) %>%
  summarize(top_auc = max(AUC)) %>%
  filter(top_auc == max(top_auc)) %>%
  arrange(-top_auc)

# Plot top results
cutpoints_unnest %>%
  mutate(
    tvalue = as.numeric(
      str_extract(string = pos_class, pattern = "[0-9]\\.[0-9]+|[0-9]+")
    ),
    variable = gsub("\\s<=.*", "", pos_class)
  ) %>%
  separate(subgroup,
    c("group1", "group2"),
    sep = "__",
    remove = FALSE
  ) %>%
  select(-boot) %>%
  mutate(group_ratio = if_else(!is.na(group2), paste(group1, group2, sep = " : "), group1)) %>%
  arrange(desc(AUC)) %>%
  group_by(pos_class) %>%
  group_by(tvalue, variable, subgroup, group_ratio, pos_class) %>%
  summarize(top_auc = max(AUC)) %>%
  ungroup() %>%
  arrange(desc(top_auc)) %>%
  group_by(tvalue, pos_class) %>%
  arrange(pos_class, tvalue, subgroup, group_ratio) %>%
  droplevels() %>%
  mutate(variable = "Predicting Outcomes: Survivor vs Non-Survivor") %>%
  ggplot() +
  geom_bar(
    aes(
      x = reorder(group_ratio, -top_auc),
      y = top_auc,
      fill = group_ratio
    ),
    stat = "identity",
    position = "dodge"
  ) +
  geom_hline(yintercept = 0.9) +
  geom_hline(yintercept = 0.8) +
  geom_hline(yintercept = 0.7) +
  shadowtext::geom_shadowtext(
    aes(
      x = group_ratio,
      y = top_auc / 2.5,
      angle = 90,
      label = group_ratio
    ),
    size = 4
  ) +
  shadowtext::geom_shadowtext(aes(
    x = group_ratio,
    y = top_auc * 1.015,
    label = round(top_auc, 2)
  )) +
  theme_bw() +
  theme(
    panel.grid.minor = eb(),
    panel.grid.major.x = eb(),
    strip.text = et(size = 14, color = "black"),
    axis.text.y = et(size = 12, color = "black"),
    axis.text.x = eb(),
    axis.ticks.x = eb(),
    axis.title.y = et(size = 14, color = "black"),
    axis.title.x = eb(),
    legend.title = et(size = 14, color = "black"),
    legend.text = et(
      size = 12,
      color = "black",
      hjust = 0
    ),
    legend.position = "none"
  ) +
  ggsci::scale_fill_igv() +
  xlab("\nMetabolite Concentration") +
  ylab("AUC \n") +
  scale_y_continuous(limits = c(0, 1), breaks = seq(0, 1, 0.1)) +
  guides(fill = guide_legend(ncol = 1)) +
  labs(fill = "Metabolite Concentration") +
  ggtitle("Predicting MICU Outcomes: Survivor vs Non-Survivor") +
  facet_grid(~variable)

ggsave(
  "./Results/Cutpoint_AUC_30_Days_Mortality_train.pdf",
  height = 6,
  width = 12,
  units = "in"
)

# Build dataframe to use cutpoints
cutpoints_results <-
  cutpoints_df %>%
  left_join(
    cutpoints_unnest %>%
      dplyr::rename(compound = subgroup) %>%
      select(compound, direction, optimal_cutpoint)
  ) %>%
  mutate(
    cutpoint_prediction = case_when(
      direction == "<=" & mvalue__mM <= optimal_cutpoint ~ 1,
      direction == "<=" &
        mvalue__mM > optimal_cutpoint ~ 0,
      direction == ">=" &
        mvalue__mM >= optimal_cutpoint ~ 1,
      direction == ">=" &
        mvalue__mM < optimal_cutpoint ~ 0
    )
  ) %>%
  group_by(metabolomicsID, compound) %>%
  mutate(md_score = sum(cutpoint_prediction)) %>%
  dplyr::slice(1) %>%
  pivot_wider(
    id_cols = c(thirtyday_mortality_overall, metabolomicsID),
    names_from = "compound",
    values_from = "cutpoint_prediction"
  ) %>%
  column_to_rownames(var = "metabolomicsID") %>%
  relocate(thirtyday_mortality_overall, .after = last_col()) %>%
  mutate(thirtyday_mortality_overall = as.factor(thirtyday_mortality_overall))

# Use Ridge to find most predictive cutpoints
set.seed(564)
cutpoint_ridge <-
  cv.glmnet(
    x = cutpoints_results %>% select(-thirtyday_mortality_overall) %>% as.matrix(),
    y = factor(cutpoints_results$thirtyday_mortality_overall, labels = c(0, 1)),
    family = "binomial",
    type.measure = "auc",
    nfolds = 10,
    alpha = 0,
  )

# Find optimal lambda value that minimizes test MSE
cutpoint_best_lambda <- cutpoint_ridge$lambda.min
cutpoint_best_lambda
## [1] 0.03351331
# Produce plot of test MSE by lambda value
plot(cutpoint_ridge)

# Find coefficients of best model
cutpoint_best_ridge <-
  glmnet(
    x = cutpoints_results %>% select(-thirtyday_mortality_overall) %>% as.matrix(),
    y = factor(cutpoints_results$thirtyday_mortality_overall, labels = c(0, 1)),
    family = "binomial",
    standardize = FALSE,
    alpha = 0,
    lambda = cutpoint_best_lambda
  )

# Save MDScore CSV in order of ridge regression importance
as.matrix(coef(cutpoint_best_ridge)) %>%
  as.data.frame() %>%
  rownames_to_column(var = "compound") %>%
  filter(compound != "(Intercept)") %>%
  arrange(desc(abs(s0))) %>%
  dplyr::rename(beta_coefficient = s0) %>% 
  left_join(
    cutpoints_unnest %>%
      select(
        compound = subgroup,
        direction,
        optimal_cutpoint_mM = optimal_cutpoint
      )
  ) %>% 
write.csv(., "./Results/MDScore_Cutpoints_train_ridge_order.csv")

min_loop_cmpds <- 2
max_loop_cmpds <-
  ncol(cutpoints_results %>% select(-thirtyday_mortality_overall) %>% as.matrix())
roc_loop_df <- NULL

for (i in seq(min_loop_cmpds, max_loop_cmpds, 1)) {
  # i = 15
  top_roc_cmpds <- as.matrix(coef(cutpoint_best_ridge)) %>%
    as.data.frame() %>%
    rownames_to_column(var = "compound") %>%
    filter(compound != "(Intercept)") %>%
    arrange(desc(abs(s0))) %>%
    slice_max(abs(s0), n = i)

  top_roc_cmpds_temp <-
    paste0(
      top_roc_cmpds %>% mutate(compound = gsub(
        x = compound,
        pattern = " ",
        replacement = "-"
      )) %>% pull(compound),
      sep = "_",
      collapse = ""
    )

  top_roc_cmpds_temp2 <-
    paste0(
      top_roc_cmpds %>% mutate(compound = gsub(
        x = compound,
        pattern = " ",
        replacement = "-"
      )) %>% pull(compound),
      sep = "\n",
      collapse = ""
    )

  cutpoints_results_var_slct <-
    cutpoints_df %>%
    filter(compound %in% top_roc_cmpds$compound) %>%
    left_join(
      cutpoints_unnest %>%
        dplyr::rename(compound = subgroup) %>%
        select(compound, direction, optimal_cutpoint)
    ) %>%
    mutate(
      cutpoint_prediction = case_when(
        direction == "<=" & mvalue__mM <= optimal_cutpoint ~ 1,
        direction == "<=" & mvalue__mM > optimal_cutpoint ~ 0,
        direction == ">=" & mvalue__mM >= optimal_cutpoint ~ 1,
        direction == ">=" & mvalue__mM < optimal_cutpoint ~ 0
      )
    ) %>%
    group_by(metabolomicsID, thirtyday_mortality_overall) %>%
    summarize(md_score = sum(cutpoint_prediction))

  # ROC curve for MD Score using training data
  pROC_obj <- pROC::roc(
    cutpoints_results_var_slct$thirtyday_mortality_overall,
    cutpoints_results_var_slct$md_score,
    smoothed = FALSE,
    ci = TRUE,
    plot = FALSE,
    auc.polygon = TRUE,
    best.method = TRUE,
    print.auc = TRUE,
    print.auc.col = "black",
    col = "#2F472F",
    auc.polygon.border = "black",
    auc.polygon.col = "gray65",
    print.thres.best.method = "youden"
  )

  loop_auc <- pROC::auc(
    pROC::roc(
      cutpoints_results_var_slct$thirtyday_mortality_overall,
      cutpoints_results_var_slct$md_score,
      smoothed = FALSE,
      ci = TRUE,
      plot = FALSE,
      auc.polygon = TRUE,
      best.method = TRUE,
      print.auc = TRUE,
      print.auc.col = "black",
      col = "#2F472F",
      auc.polygon.border = "black",
      auc.polygon.col = "gray65",
      print.thres.best.method = "youden"
    )
  )[1]

  coordinates <-
    cbind(data.frame(auc = loop_auc), coords(
      pROC_obj,
      "best",
      ret = c("auc", "threshold", "accuracy", "sens", "spec", "ppv", "npv")
    ))

  pROC::roc(
    cutpoints_results_var_slct$thirtyday_mortality_overall,
    cutpoints_results_var_slct$md_score,
    smooth = FALSE,
    ci = TRUE,
    plot = TRUE,
    auc.polygon = TRUE,
    print.auc = TRUE,
    print.auc.col = "black",
    col = "#2F472F",
    auc.polygon.border = "black",
    auc.polygon.col = "gray65",
    print.thres.best.method = "youden"
  )

  text(
    paste("ACC:", round(coordinates$accuracy, 3) * 100, "%"),
    x = 0.5,
    y = 0.45,
    adj = 0
  )
  text(paste("PPV:", round(coordinates$ppv, 2)),
    x = 0.5,
    y = 0.41,
    adj = 0
  )
  text(paste("NPV:", round(coordinates$npv, 2)),
    x = 0.5,
    y = 0.37,
    adj = 0
  )
  text(
    paste("Threshold:", round(coordinates$threshold, 2)),
    x = 0.5,
    y = 0.33,
    adj = 0
  )
  text(
    paste("i:", i, "\n", top_roc_cmpds_temp2),
    x = 1.1,
    y = 0.6,
    adj = 0
  )

  roc_plot <- grDevices::recordPlot()

  cairo_pdf(
    paste0(
      "./Results/ROC_curve_cutpoint_30_Day_Mortality_",
      i,
      ".pdf"
    ),
    width = 8,
    height = 6
  )
  pROC_obj <- pROC::roc(
    cutpoints_results_var_slct$thirtyday_mortality_overall,
    cutpoints_results_var_slct$md_score,
    smooth = FALSE,
    ci = TRUE,
    plot = TRUE,
    auc.polygon = TRUE,
    print.auc = TRUE,
    print.auc.col = "black",
    col = "#2F472F",
    auc.polygon.border = "black",
    auc.polygon.col = "gray65",
    print.thres.best.method = "youden"
  )
  text(
    paste("ACC:", round(coordinates$accuracy, 3) * 100, "%"),
    x = 0.5,
    y = 0.45,
    adj = 0
  )
  text(paste("PPV:", round(coordinates$ppv, 2)),
    x = 0.5,
    y = 0.41,
    adj = 0
  )
  text(paste("NPV:", round(coordinates$npv, 2)),
    x = 0.5,
    y = 0.37,
    adj = 0
  )
  text(
    paste("Threshold:", round(coordinates$threshold, 2)),
    x = 0.5,
    y = 0.33,
    adj = 0
  )
  text(
    paste("i:", i, "\n", top_roc_cmpds_temp2),
    x = 1.1,
    y = 0.6,
    adj = 0
  )

  invisible(dev.off())

  # Build looped csv of all model metrics and their compounds
  roc_loop_df <- base::rbind(
    roc_loop_df,
    coordinates %>%
      mutate(
        i = i,
        compounds = top_roc_cmpds_temp
      ) %>%
      mutate(across(auc:npv, \(x) round(x, 3))) %>%
      rowid_to_column() %>%
      separate(
        col = compounds,
        into = paste("compound", seq(1, max_loop_cmpds, 1)),
        sep = "_"
      )
  )
}

# Output to CSV files
write.csv(
  roc_loop_df,
  "./Results/ROC_Results_Ridge_Cutpoint_train.csv"
)

cutpoints_roc_loop <-
  roc_loop_df %>%
  group_by(i) %>% # i is coming from the last iteration of the loop above
  slice_max(threshold) %>% # This is because the i = 2 returns both Inf and -Inf values for the threshold
  pivot_longer(
    cols = !c(i, rowid, `compound 1`:last_col()),
    names_to = "model_parameter",
    values_to = "model_value"
  ) %>%
  pivot_longer(
    cols = !c(i, rowid, model_parameter, model_value),
    names_to = "compound_id",
    values_to = "compound"
  ) %>%
  drop_na() %>%
  filter(grepl(compound, pattern = "\\w+")) %>%
  mutate(compound = gsub(
    x = compound,
    pattern = "-acid",
    replacement = " acid"
  )) %>%
  dplyr::rename("number_of_compounds" = i) %>%
  filter(model_parameter == "threshold") %>% 
  group_by(number_of_compounds, compound) %>% 
  dplyr::slice(1) %>% 
  ungroup() %>% 
  left_join(
    cutpoints_df %>% select(
      metabolomicsID,
      compound,
      mvalue__mM,
      thirtyday_mortality_overall,
      thirtyday_mortality_overall_class
    )
  ) %>% 
  left_join(
    cutpoints_unnest %>%
      dplyr::rename(compound = subgroup) %>%
      select(compound, direction, optimal_cutpoint)
  ) %>%
  mutate(
    cutpoint_prediction = case_when(
      direction == "<=" & mvalue__mM <= optimal_cutpoint ~ 1,
      direction == "<=" & mvalue__mM > optimal_cutpoint ~ 0,
      direction == ">=" & mvalue__mM >= optimal_cutpoint ~ 1,
      direction == ">=" & mvalue__mM < optimal_cutpoint ~ 0
    )
  ) %>%
  group_by(number_of_compounds, metabolomicsID, model_parameter, model_value, thirtyday_mortality_overall) %>%
  summarize(md_score = sum(cutpoint_prediction)) %>% 
  mutate(grouped_md_score = ifelse(md_score > model_value, "High Score", "Low Score"))

# Kaplan Meier
km_nocovid <- micu_new_nocovid_oc %>%
  select(
    unique_id,
    sampleid,
    metabolomicsID,
    days_until_death_overall,
    censoring_thirtyday_mortality_overall,
    thirtyday_mortality_overall
  ) %>%
  ungroup() %>%
  mutate(
    surv_days = ifelse(
      is.na(days_until_death_overall) &
        thirtyday_mortality_overall == "Survivor",
      censoring_thirtyday_mortality_overall,
      days_until_death_overall
    ),
    surv_days = ifelse(
      is.na(surv_days) &
        thirtyday_mortality_overall == "Survivor",
      30,
      surv_days
    ),
    surv_days = ifelse(
      surv_days > 30 &
        thirtyday_mortality_overall == "Survivor",
      30,
      surv_days
    ),
    thirtyday_mortality_overall_class = ifelse(thirtyday_mortality_overall == "Survivor", 0, 1)
  ) %>% # Non-Survivor is 1
  left_join(
    cutpoints_roc_loop %>% select(number_of_compounds, metabolomicsID, grouped_md_score, md_score)
  ) %>%
  mutate(grouped_md_score_class = factor(
    grouped_md_score,
    levels = c("Low Score", "High Score"),
    labels = c(1, 0)
  ))                      # Low Score = 1, High Score = 0, due to area = Low Score - High Score)

km_loop <-
  km_nocovid %>%
  left_join(
    tableone_nocovid_df_filt %>% select(unique_id, age:day_collected, penicillins:last_col())
  ) %>%
  group_by(number_of_compounds) %>%
  group_map(~ (
    rmst2(
      time = .x$surv_days,
      status = .x$thirtyday_mortality_overall_class,
      arm = .x$grouped_md_score_class,
      # covariates = c(
      #   .x$age,
      #   .x$sex_factor,
      #   # .x$race_factor, # Check if commas are the problem
      #   .x$cci_total_sc,
      #   .x$ards_factor,
      #   .x$sepsis_factor,
      #   .x$sofa_score_total,
      #   .x$ap2_total_score,
      #   .x$day_collected,
      #   .x$diet
      # ),
      tau = 30
    )
  )) 

km_loop_df <- as.data.frame(do.call(rbind, km_loop))

km_loop_df2 <- NULL

for (i in seq(1,30,1)){
  km_loop_df2 <- janitor::clean_names(rbind(km_loop_df2, km_loop_df[["unadjusted.result"]][i][1][[1]], i))
}

km_loop_df2 <-
  as.data.frame(km_loop_df2) %>% 
  rownames_to_column(var = "variable") %>% 
  filter(variable %in% c("rmst_arm_1_arm_0",
                         "rmst_arm_1_arm_0_2_2",
                         "rmst_arm_1_arm_0_2_2_2",
                         "rmst_arm_1_arm_0_2_2_2_2",
                         "rmst_arm_1_arm_0_2_2_2_2_2",
                         "rmst_arm_1_arm_0_2_2_2_2_2_2",
                         "rmst_arm_1_arm_0_2_2_2_2_2_2_2",
                         "rmst_arm_1_arm_0_2_2_2_2_2_2_2_2",
                         "rmst_arm_1_arm_0_2_2_2_2_2_2_2_2_2",
                         "rmst_arm_1_arm_0_2_2_2_2_2_2_2_2_2_2",
                         "rmst_arm_1_arm_0_2_2_2_2_2_2_2_2_2_2_2",
                         "rmst_arm_1_arm_0_2_2_2_2_2_2_2_2_2_2_2_2",
                         "rmst_arm_1_arm_0_2_2_2_2_2_2_2_2_2_2_2_2_2",
                         "rmst_arm_1_arm_0_2_2_2_2_2_2_2_2_2_2_2_2_2_2",
                         "rmst_arm_1_arm_0_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2",
                         "rmst_arm_1_arm_0_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2",
                         "rmst_arm_1_arm_0_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2",
                         "rmst_arm_1_arm_0_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2",
                         "rmst_arm_1_arm_0_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2",
                         "rmst_arm_1_arm_0_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2",
                         "rmst_arm_1_arm_0_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2",
                         "rmst_arm_1_arm_0_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2",
                         "rmst_arm_1_arm_0_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2",
                         "rmst_arm_1_arm_0_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2",
                         "rmst_arm_1_arm_0_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2",
                         "rmst_arm_1_arm_0_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2",
                         "rmst_arm_1_arm_0_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2",
                         "rmst_arm_1_arm_0_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2",
                         "rmst_arm_1_arm_0_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2",
                         "rmst_arm_1_arm_0_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2_2")) %>% 
  rownames_to_column(var = "number_of_compounds") %>% 
  mutate(number_of_compounds = as.numeric(number_of_compounds) + 1)

# Plot results
# Choose optimal model (based on Accuracy, AUC, NPV, PPV, and complexity)
optimal_components <- 13

hline_dat <-
  data.frame(
    model_parameter = c("AUC", "Accuracy", "Sensitivity", "Specificity"),
    value = c(
      # -1 because of i = 1 being 2 compounds etc.
      roc_loop_df$auc[i = optimal_components - 1],
      roc_loop_df$accuracy[i = optimal_components - 1],
      roc_loop_df$sensitivity[i = optimal_components - 1],
      roc_loop_df$specificity[i = optimal_components - 1]
    )
  )

# Plot results
gg_roc_loop_metrics <-
  roc_loop_df %>%
  group_by(i) %>% # i is coming from the last iteration of the loop above
  slice_max(accuracy) %>% # This is because the i = 2 returns both Inf and -Inf values for the threshold
  pivot_longer(
    cols = !c(i, rowid, `compound 1`:last_col()),
    names_to = "model_parameter",
    values_to = "model_value"
  ) %>%
  pivot_longer(
    cols = !c(i, rowid, model_parameter, model_value),
    names_to = "compound_id",
    values_to = "compound"
  ) %>%
  drop_na() %>%
  filter(grepl(compound, pattern = "\\w+")) %>%
  dplyr::rename("number_of_compounds" = i) %>%
  filter(is.finite(model_value)) %>%
  distinct(rowid, number_of_compounds, model_parameter, model_value) %>%
  mutate(model_parameter = ifelse(
    nchar(model_parameter) > 3,
    str_to_title(model_parameter),
    str_to_upper(model_parameter)
  )) %>%
  group_by(model_parameter) %>%
  mutate(
    model_value_lab = ifelse(model_parameter == "Threshold", NA, model_value),
    model_value_lab = sprintf("%.2f", model_value_lab),
    model_value_lab = ifelse(model_value_lab == "NA", NA, model_value_lab)
  ) %>%
  group_by(model_parameter) %>%
  mutate(
    max_parameter = max(model_value),
    max_parameter_color = ifelse(max_parameter == model_value, "max", NA)
  ) %>%
  filter(model_parameter %in% c("AUC", "Accuracy", "Sensitivity", "Specificity")) %>%
  ggpubr::ggdotchart(
    x = "number_of_compounds",
    y = "model_value",
    color = "model_parameter",
    fill = "max_parameter_color",
    label = "model_value_lab",
    add = "segment",
    sorting = "none",
    dot.size = 3.5,
    font.label = list(
      color = "black",
      size = 8,
      hjust = 2.25,
      vjust = -1
    ),
  ) +
  geom_hline(
    data = hline_dat,
    aes(yintercept = value, color = model_parameter),
    alpha = 0.5
  ) +
  annotate(
    "rect",
    xmin = optimal_components-1.5,
    xmax = optimal_components-0.5,
    ymin = -Inf,
    ymax = Inf,
    alpha = 0.2,
    fill = "grey70"
  ) +
  theme(
    legend.position = "right",
    axis.text.x = eb(),
    axis.title.x = eb()
  ) +
  facet_wrap(~model_parameter, ncol = 1) +
  scale_color_manual(values = c(paletteer::paletteer_d("nbapalettes::bulls_city"), "#005076FF")) +
  guides(color = guide_legend(
    title = "Model Parameter",
    keyheight = 1.5,
    keywidth = 1.5
  )) +
  ylab("Model Metric Value\n") +
  scale_y_continuous(
    expand = expansion(mult = c(0, 0.4)),
    breaks = c(0, 0.5, 1)
  )

gg_km_loop <-
  km_loop_df2 %>%
  arrange(number_of_compounds) %>%
  select(-variable) %>% 
  mutate_all(as.numeric) %>%
  ggpubr::ggdotchart(
    .,
    x = "number_of_compounds",
    y = "est",
    sorting = "none",
    size = 3.5,
    color = "p"
  ) +
  geom_errorbar(
    aes(ymax = `upper_95`, ymin = `lower_95`),
    position = position_dodge(width = 0.8),
    width = 0.25
  ) +
  geom_hline(
    data = data.frame(value = km_loop_df2 %>% filter(number_of_compounds == optimal_components) %>% pull(est)),
    aes(yintercept = value), color = "#C75DAAFF",
    alpha = 0.5
  ) +
  annotate(
    "rect",
    xmin = optimal_components - 1.5,
    xmax = optimal_components - 0.5,
    ymin = -Inf,
    ymax = Inf,
    alpha = 0.2,
    fill = "grey70"
  ) +
  theme(
    axis.title.x = eb(),
    axis.text.x = eb(),
    legend.position = "right"
  ) +
  ylab("∆ RMST\n") +
  xlab("") +
  paletteer::scale_color_paletteer_c(trans = "log10", "grDevices::Tropic") +
  labs(color = "p-value") +
  scale_x_discrete(expand = expansion(add = c(0.6, 1)))

gg_roc_loop_compounds <-
  roc_loop_df %>%
  pivot_longer(
    cols = !c(i, rowid, `compound 1`:last_col()),
    names_to = "model_parameter",
    values_to = "model_value"
  ) %>%
  pivot_longer(
    cols = !c(i, rowid, model_parameter, model_value),
    names_to = "compound_id",
    values_to = "compound"
  ) %>%
  drop_na() %>%
  filter(grepl(compound, pattern = "\\w+")) %>%
  dplyr::rename("number_of_compounds" = i) %>%
  distinct(number_of_compounds, compound) %>%
  mutate(
    compound = str_to_title(compound),
    compound = gsub(compound, pattern = "-Acid", replacement = " Acid"),
    compound = factor(compound, levels = rev(c(
      gsub(
        gsub(
          str_to_title(top_roc_cmpds$compound),
          pattern = " ",
          replacement = "-"
        ),
        pattern = "-Acid",
        replacement = " Acid"
      )
    ))),
    number_of_unique_compounds = 1
  ) %>% # This is important to run the loop before this, so it ends with i = 30 for top_roc_cmpds
  ggpubr::ggbarplot(x = "number_of_compounds", y = "number_of_unique_compounds", fill = "compound") +
  theme( # legend.position = c(1.15,0.9981),
    # legend.justification = c("right", "top"),
    # legend.spacing.y = unit(0, "lines"),
    # legend.background = eb(),
    # panel.grid.major.x = el(color = "black")
    legend.position = "right"
  ) +
  annotate(
    "rect",
    xmin = optimal_components-1.5,
    xmax = optimal_components-0.5,
    ymin = -Inf,
    ymax = Inf,
    alpha = 0.2,
    fill = "grey70"
  ) +
  paletteer::scale_fill_paletteer_d(palette = "Polychrome::glasbey") +
  guides(fill = guide_legend(
    title = "",
    keyheight = 0.75,
    keywidth = 1,
    title.vjust = 1,
    ncol = 1
  )) +
  ylab("Number of Compounds Included in ROC\n") +
  xlab("\nNumber of Compounds Included in ROC") +
  scale_y_discrete(expand = expansion(add = c(0.1, 0.1))) +
  scale_x_discrete(expand = expansion(add = c(0.6, 1)))

# Combine plots
cairo_pdf("./Results/ROC_Loop_train.pdf", height = 12, width = 14)

gg_roc_loop_metrics / gg_km_loop/ gg_roc_loop_compounds +
  patchwork::plot_layout(heights = c(1, 0.35, 1.1))

invisible(dev.off())

Optimal Metabolic Dysbiosis Score

# KM Curves: MD Score
km_nocovid_final <- km_nocovid %>% 
  filter(number_of_compounds == optimal_components)

set.seed(123)
surv_object <-
  Surv(
    time = km_nocovid_final$surv_days,
    event = km_nocovid_final$thirtyday_mortality_overall_class
  )

fit1 <- survfit(surv_object ~ grouped_md_score, data = km_nocovid_final)

ggs <- ggsurvplot(
  fit1,
  data = km_nocovid_final,
  size = 1,
  palette = c("#C45258", "#2F4858"),
  xlab = "Days from Admission",
  conf.int = TRUE,
  pval = TRUE,
  risk.table = "abs_pct",
  legend = "bottom",
  risk.table.height = 0.4,
  risk.table.y.text.col = TRUE,
  tables.y.text = FALSE,
  risk.table.fontsize = 2.8,
  pval.size = 3.5,
  ggtheme = theme_test() + theme(
    panel.grid.major = el(linewidth = 0.5, color = "gray90"),
    axis.text.y = et(color = "black", size = 10),
    axis.title.y = et(color = "black")
  ),
  legend.labs = c("High MD Score", "Low MD Score")
)

# Change table axis labels
ggs$table <-
  ggs$table + labs(x = NULL, y = NULL) + theme(plot.title = eb()) # risk table

ggs

pdf(
  "./Results/kaplan_meier_roc_loop_30_Day_Mortality_train.pdf",
  height = 6,
  width = 8,
  onefile = FALSE
)
ggs
invisible(dev.off())

# RMST
rmst2(
      time = km_nocovid_final$surv_days,
      status = km_nocovid_final$thirtyday_mortality_overall_class,
      arm = km_nocovid_final$grouped_md_score_class,
      tau = 30
    )
## 
## The truncation time: tau = 30  was specified. 
## 
## Restricted Mean Survival Time (RMST) by arm 
##                Est.    se lower .95 upper .95
## RMST (arm=1) 27.865 0.596    26.697    29.034
## RMST (arm=0) 15.581 1.562    12.520    18.643
## 
## 
## Restricted Mean Time Lost (RMTL) by arm 
##                Est.    se lower .95 upper .95
## RMTL (arm=1)  2.135 0.596     0.966     3.303
## RMTL (arm=0) 14.419 1.562    11.357    17.480
## 
## 
## Between-group contrast 
##                        Est. lower .95 upper .95 p
## RMST (arm=1)-(arm=0) 12.284     9.007    15.561 0
## RMST (arm=1)/(arm=0)  1.788     1.463     2.186 0
## RMTL (arm=1)/(arm=0)  0.148     0.082     0.266 0
# Boxplot of MD Score
mds_chis <-
  stats::chisq.test(
    km_nocovid_final$thirtyday_mortality_overall,
    km_nocovid_final$md_score
  )

md_violin <-
  ggviolin(
    km_nocovid_final,
    x = "thirtyday_mortality_overall",
    y = "md_score",
    fill = "thirtyday_mortality_overall",
    palette = "lancet",
    add = c("dotplot"),
    add.params = list(binwidth = 0.05)
  ) +
  annotate(
    "text",
    x = 1.5,
    y = 16,
    label = paste0(
      "Chisq",
      "(",
      round(mds_chis$statistic, 3),
      "),",
      " p =",
      scales::scientific(mds_chis$p.value)
    )
  ) +
  annotate(
    "segment", # horizontal line between the two violins
    x = 1,
    xend = 2,
    y = 15.5,
    yend = 15.5
  ) +
  annotate(
    "segment", # vertical segment connecting to horizontal line above survivor group
    x = 1,
    xend = 1,
    y = 15.5,
    yend = 15.2
  ) +
  annotate(
    "segment",
    x = 2,
    xend = 2,
    y = 15.5,
    yend = 15.2
  ) +
  ylab("Metabolic Dysbiosis Score\n") +
  xlab("") +
  guides(fill = guide_legend("30 Day Mortality"))

md_violin

ggsave(
  plot = md_violin,
  filename = "./Results/MDS_Violin_train.pdf",
  height = 6,
  width = 8
)

# gg_mds_chi <- gginference::ggchisqtest(mds_chis, colaccept = "green3", colreject = "red3") # It is highly unlikely that our test statistic would be observed if there were no association between survival outcome and the md score
# gg_mds_chi

# Confusion matrix for 14 compounds
km_nocovid_final2 <- km_nocovid_final %>% 
  mutate(prediction = ifelse(grouped_md_score == "Low Score", "Survivor", "Non-Survivor"))

caret::confusionMatrix(table(
  factor(
    km_nocovid_final2$prediction,
    levels = c("Survivor", "Non-Survivor")
  ),
  factor(
    km_nocovid_final2$thirtyday_mortality_overall,
    levels = c("Survivor", "Non-Survivor")
  )
))
## Confusion Matrix and Statistics
## 
##               
##                Survivor Non-Survivor
##   Survivor           91           13
##   Non-Survivor       11           32
##                                           
##                Accuracy : 0.8367          
##                  95% CI : (0.7669, 0.8925)
##     No Information Rate : 0.6939          
##     P-Value [Acc > NIR] : 5.481e-05       
##                                           
##                   Kappa : 0.6109          
##                                           
##  Mcnemar's Test P-Value : 0.8383          
##                                           
##             Sensitivity : 0.8922          
##             Specificity : 0.7111          
##          Pos Pred Value : 0.8750          
##          Neg Pred Value : 0.7442          
##              Prevalence : 0.6939          
##          Detection Rate : 0.6190          
##    Detection Prevalence : 0.7075          
##       Balanced Accuracy : 0.8016          
##                                           
##        'Positive' Class : Survivor        
## 
# Confusion Matrix and Statistics
# 
#               
#                Survivor Non-Survivor
#   Survivor           86           11
#   Non-Survivor       16           34
#                                           
#                Accuracy : 0.8163          
#                  95% CI : (0.7441, 0.8753)

Cutpoint Analysis: Shannon Diversity

# Cutpoint dataframe
cutpoints_df_shannon <- alpha_shannon %>%
  left_join(
    micu_new_nocovid_oc %>%
      ungroup() %>%
      select(unique_id, shotgunSeq_id, thirtyday_mortality_overall) %>%
      distinct(shotgunSeq_id, .keep_all = TRUE),
    by = "shotgunSeq_id"
  ) %>%
  mutate(
    thirtyday_mortality_overall_class = ifelse(thirtyday_mortality_overall == "Survivor", 0, 1)
  )

# Optimal cutpoints
set.seed(123456)
cutpoints_shannon <-
  cutpoints_df_shannon %>%
  group_map(
    ~ safe_cutpointr(
      .,
      Shannon,
      thirtyday_mortality_overall_class,
      method = maximize_metric,
      metric = youden,
      pos_class = 0,
      neg_class = 1,
      boot_runs = 100,
      use_midpoints = TRUE,
      na.rm = T
    ),
    .keep = TRUE
  )

cutpoints_unnest_shannon <- cutpoints_shannon %>%
  map_df(as_tibble)

# Summary table
cutpoints_unnest_summary_shannon <-
  cutpoints_unnest_shannon %>%
  group_by(pos_class, optimal_cutpoint) %>%
  summarize(top_auc = max(AUC)) %>%
  filter(top_auc == max(top_auc)) %>%
  arrange(-top_auc)

# Plot ROC curves
cutpoints_unnest_shannon %>%
  arrange(desc(AUC)) %>%
  group_by(pos_class) %>%
  ungroup() %>%
  unnest(roc_curve) %>%
  arrange(desc(AUC)) %>%
  mutate(auc_label = paste0("AUC = ", round(AUC, 5))) %>%
  ggplot(aes(x = fpr, y = tpr)) +
  geom_line() +
  geom_text(aes(label = auc_label, x = 0.6, y = 0.2)) +
  geom_text(aes(
    label = round(optimal_cutpoint, 3),
    y = 0.8,
    x = 0.2
  )) +
  facet_wrap(~pos_class)

cutpoints_results_var_slct_shannon <- cutpoints_df_shannon %>%
  mutate(
    shannon_class = ifelse(
      Shannon >= cutpoints_unnest_summary_shannon$optimal_cutpoint,
      paste0(
        "High Diversity (Shannon > ",
        round(cutpoints_unnest_summary_shannon$optimal_cutpoint, 2),
        ")"
      ),
      paste0(
        "Low Diversity (Shannon < ",
        round(cutpoints_unnest_summary_shannon$optimal_cutpoint, 2),
        ")"
      )
    ),
    shannon_binary = ifelse(
      Shannon >= cutpoints_unnest_summary_shannon$optimal_cutpoint,
      1,
      0
    )
  )

# ROC curve for Shannon Diversity using training data
pROC_obj_shannon <- pROC::roc(
  cutpoints_results_var_slct_shannon$thirtyday_mortality_overall_class,
  cutpoints_results_var_slct_shannon$Shannon,
  smoothed = TRUE,
  ci = TRUE,
  plot = FALSE,
  auc.polygon = TRUE,
  best.method = TRUE,
  print.auc = TRUE,
  print.auc.col = "black",
  col = "#2F472F",
  auc.polygon.border = "black",
  auc.polygon.col = "gray65",
  print.thres.best.method = "youden"
)

coordinates_shannon <-
  coords(pROC_obj_shannon,
    "best",
    ret = c("acc", "threshold", "sens", "spec", "ppv", "npv")
  )

pROC::roc(
  cutpoints_results_var_slct_shannon$thirtyday_mortality_overall_class,
  cutpoints_results_var_slct_shannon$Shannon,
  smoothed = TRUE,
  ci = TRUE,
  plot = TRUE,
  auc.polygon = TRUE,
  best.method = TRUE,
  print.auc = TRUE,
  print.auc.col = "black",
  col = "#2F472F",
  auc.polygon.border = "black",
  auc.polygon.col = "gray65",
  print.thres.best.method = "youden"
)
## 
## Call:
## roc.default(response = cutpoints_results_var_slct_shannon$thirtyday_mortality_overall_class,     predictor = cutpoints_results_var_slct_shannon$Shannon, ci = TRUE,     plot = TRUE, smoothed = TRUE, auc.polygon = TRUE, best.method = TRUE,     print.auc = TRUE, print.auc.col = "black", col = "#2F472F",     auc.polygon.border = "black", auc.polygon.col = "gray65",     print.thres.best.method = "youden")
## 
## Data: cutpoints_results_var_slct_shannon$Shannon in 102 controls (cutpoints_results_var_slct_shannon$thirtyday_mortality_overall_class 0) > 45 cases (cutpoints_results_var_slct_shannon$thirtyday_mortality_overall_class 1).
## Area under the curve: 0.5858
## 95% CI: 0.4856-0.6861 (DeLong)
text(
  paste("ACC:", round(coordinates_shannon$accuracy, 3) * 100, "%"),
  x = 0.5,
  y = 0.45,
  adj = 0
)
text(paste("PPV:", round(coordinates_shannon$ppv, 2)),
  x = 0.5,
  y = 0.41,
  adj = 0
)
text(paste("NPV:", round(coordinates_shannon$npv, 2)),
  x = 0.5,
  y = 0.37,
  adj = 0
)
text(paste("Threshold:", round(coordinates_shannon$threshold, 2)),
  x = 0.5,
  y = 0.33,
  adj = 0
)

roc_plot_shannon <- grDevices::recordPlot()

cairo_pdf(
  "./Results/ROC_curve_cutpoint_shannon_30_Day_Mortality_train.pdf",
  width = 8,
  height = 6
)
pROC::roc(
  cutpoints_results_var_slct_shannon$thirtyday_mortality_overall_class,
  cutpoints_results_var_slct_shannon$Shannon,
  smoothed = TRUE,
  ci = TRUE,
  plot = TRUE,
  auc.polygon = TRUE,
  best.method = TRUE,
  print.auc = TRUE,
  print.auc.col = "black",
  col = "#2F472F",
  auc.polygon.border = "black",
  auc.polygon.col = "gray65",
  print.thres.best.method = "youden"
)
## 
## Call:
## roc.default(response = cutpoints_results_var_slct_shannon$thirtyday_mortality_overall_class,     predictor = cutpoints_results_var_slct_shannon$Shannon, ci = TRUE,     plot = TRUE, smoothed = TRUE, auc.polygon = TRUE, best.method = TRUE,     print.auc = TRUE, print.auc.col = "black", col = "#2F472F",     auc.polygon.border = "black", auc.polygon.col = "gray65",     print.thres.best.method = "youden")
## 
## Data: cutpoints_results_var_slct_shannon$Shannon in 102 controls (cutpoints_results_var_slct_shannon$thirtyday_mortality_overall_class 0) > 45 cases (cutpoints_results_var_slct_shannon$thirtyday_mortality_overall_class 1).
## Area under the curve: 0.5858
## 95% CI: 0.4856-0.6861 (DeLong)
text(
  paste("ACC:", round(coordinates_shannon$accuracy, 3) * 100, "%"),
  x = 0.5,
  y = 0.45,
  adj = 0
)
text(paste("PPV:", round(coordinates_shannon$ppv, 2)),
  x = 0.5,
  y = 0.41,
  adj = 0
)
text(paste("NPV:", round(coordinates_shannon$npv, 2)),
  x = 0.5,
  y = 0.37,
  adj = 0
)
text(paste("Threshold:", round(coordinates_shannon$threshold, 2)),
  x = 0.5,
  y = 0.33,
  adj = 0
)
invisible(dev.off())

model_comps_df <- micu_nocovid_first_samps_omics_light %>%
  filter(metabolomicsID %in% micu_new_nocovid_oc$metabolomicsID) %>%
  group_by(unique_id, shotgunSeq_id, taxid) %>%
  dplyr::slice(1) %>%
  left_join(taxdmp %>% mutate(taxid = as.character(taxid))) %>%
  mutate(Species = paste(Kingdom, Phylum, Class, Order, Family, Genus, Species, sep = "|")) %>%
  filter(grepl(pattern = "Enterococcus|Enterobacterales", x = Species)) %>%
  mutate(organism = case_when(
    grepl(pattern = "Enterococcus", x = Species) ~ "Enterococcus",
    grepl(pattern = "Enterobacterales", x = Species) ~ "Enterobacterales",
    TRUE ~ NA
  )) %>%
  drop_na(organism) %>%
  select(unique_id, thirtyday_mortality_overall, organism, pctseqs) %>%
  group_by(unique_id, thirtyday_mortality_overall, organism) %>%
  summarise(pctseqs = sum(pctseqs)) %>%
  pivot_wider(names_from = "organism", values_from = "pctseqs") %>% 
  left_join(micu_new_nocovid_oc %>% select(unique_id, shotgunSeq_id, metabolomicsID)) %>%
  select(unique_id, shotgunSeq_id, metabolomicsID, Enterococcus, Enterobacterales) %>%
  left_join(alpha_shannon) %>%
  left_join(tableone_nocovid_df_filt %>% select(unique_id, thirtyday_mortality_overall)) %>%
  left_join(km_nocovid_final)

# MD Score
pROC_obj_mds <- pROC::roc(
  model_comps_df$thirtyday_mortality_overall,
  model_comps_df$md_score,
  smoothed = TRUE,
  ci = TRUE,
  plot = FALSE,
  auc.polygon = TRUE,
  best.method = TRUE,
  print.auc = TRUE,
  print.auc.col = "black",
  col = "#2F472F",
  auc.polygon.border = "black",
  auc.polygon.col = "gray65",
  print.thres.best.method = "youden"
)

coordinates_mds <-
  coords(pROC_obj_mds,
    "best",
    ret = c("acc", "threshold", "sens", "spec", "ppv", "npv")
  )

pROC::roc(
  model_comps_df$thirtyday_mortality_overall,
  model_comps_df$md_score,
  smoothed = TRUE,
  ci = TRUE,
  plot = TRUE,
  auc.polygon = TRUE,
  best.method = TRUE,
  print.auc = TRUE,
  print.auc.col = "black",
  col = "#2F472F",
  auc.polygon.border = "black",
  auc.polygon.col = "gray65",
  print.thres.best.method = "youden"
)
## 
## Call:
## roc.default(response = model_comps_df$thirtyday_mortality_overall,     predictor = model_comps_df$md_score, ci = TRUE, plot = TRUE,     smoothed = TRUE, auc.polygon = TRUE, best.method = TRUE,     print.auc = TRUE, print.auc.col = "black", col = "#2F472F",     auc.polygon.border = "black", auc.polygon.col = "gray65",     print.thres.best.method = "youden")
## 
## Data: model_comps_df$md_score in 102 controls (model_comps_df$thirtyday_mortality_overall Survivor) < 45 cases (model_comps_df$thirtyday_mortality_overall Non-Survivor).
## Area under the curve: 0.8563
## 95% CI: 0.7849-0.9277 (DeLong)
text(
  paste("ACC:", round(coordinates_mds$accuracy, 3) * 100, "%"),
  x = 0.5,
  y = 0.45,
  adj = 0
)
text(paste("PPV:", round(coordinates_mds$ppv, 2)),
  x = 0.5,
  y = 0.41,
  adj = 0
)
text(paste("NPV:", round(coordinates_mds$npv, 2)),
  x = 0.5,
  y = 0.37,
  adj = 0
)
text(paste("Threshold:", round(coordinates_mds$threshold, 2)),
  x = 0.5,
  y = 0.33,
  adj = 0
)

# MD Score Model Metrics
coordinates_mds_df <- model_comps_df %>%
  bind_cols(coordinates_mds$threshold[1]) %>% 
  dplyr::rename(threshold = ...19) %>%
  mutate(prediction = ifelse(md_score > threshold, "Non-Survivor", "Survivor"))

caret::confusionMatrix(table(
  factor(
    coordinates_mds_df$prediction,
    levels = c("Survivor", "Non-Survivor")
  ),
  factor(
    coordinates_mds_df$thirtyday_mortality_overall,
    levels = c("Survivor", "Non-Survivor")
  )
))
## Confusion Matrix and Statistics
## 
##               
##                Survivor Non-Survivor
##   Survivor           91           13
##   Non-Survivor       11           32
##                                           
##                Accuracy : 0.8367          
##                  95% CI : (0.7669, 0.8925)
##     No Information Rate : 0.6939          
##     P-Value [Acc > NIR] : 5.481e-05       
##                                           
##                   Kappa : 0.6109          
##                                           
##  Mcnemar's Test P-Value : 0.8383          
##                                           
##             Sensitivity : 0.8922          
##             Specificity : 0.7111          
##          Pos Pred Value : 0.8750          
##          Neg Pred Value : 0.7442          
##              Prevalence : 0.6939          
##          Detection Rate : 0.6190          
##    Detection Prevalence : 0.7075          
##       Balanced Accuracy : 0.8016          
##                                           
##        'Positive' Class : Survivor        
## 
# Confusion Matrix and Statistics
# 
#               
#                Survivor Non-Survivor
#   Survivor           86           11
#   Non-Survivor       16           34
#                                           
#                Accuracy : 0.8163          
#                  95% CI : (0.7441, 0.8753)

# Build df of MDS and Shannon
mds_shannon <- micu_new_nocovid_oc %>%
  select(shotgunSeq_id, metabolomicsID) %>%
  right_join(km_nocovid_final %>% select(metabolomicsID, thirtyday_mortality_overall, md_score)) %>%
  left_join(cutpoints_results_var_slct_shannon) %>%
  select(Shannon, md_score) %>%
  mutate(
    outlier = ifelse(
      Shannon < coordinates_shannon$threshold &
        md_score < coordinates_mds$threshold |
        Shannon > coordinates_shannon$threshold &
          md_score > coordinates_mds$threshold,
      "Outlier",
      "Not Outlier"
    )
  )

# Correlation plot of MDS with Shannon
gg_mds_shannon_scatter <-
  ggscatter(
    mds_shannon,
    y = "Shannon",
    x = "md_score",
    size = 3,
    color = "outlier",
    alpha = 0.2,
    palette = "lancet",
    add = "reg.line",
    add.params = list(color = "black"),
    conf.int = TRUE
  ) +
  stat_cor(method = "spearman") +
  geom_vline(xintercept = coordinates_mds$threshold, linetype = "longdash") +
  geom_hline(yintercept = coordinates_shannon$threshold, linetype = "longdash") +
  ylab("\U03B1-Diversity\n") +
  xlab("\nMD Score") +
  guides(color = guide_legend("Outlier"))

gg_mds_shannon_scatter

cairo_pdf(
  "./Results/MDS_Shannon_Correlation_train.pdf",
  height = 6,
  width = 8
)
gg_mds_shannon_scatter
invisible(dev.off())

ROC and Correlation Analyses: Enterococcus/Enterobacterales Relative Abundance and MDS

# Enterococcus Relative Abundance
pROC_obj_ecoc <- pROC::roc(
  model_comps_df$thirtyday_mortality_overall,
  model_comps_df$Enterococcus,
  smoothed = TRUE,
  ci = TRUE,
  plot = FALSE,
  auc.polygon = TRUE,
  best.method = TRUE,
  print.auc = TRUE,
  print.auc.col = "black",
  col = "#2F472F",
  auc.polygon.border = "black",
  auc.polygon.col = "gray65",
  print.thres.best.method = "youden"
)

coordinates_ecoc <-
  pROC::coords(pROC_obj_ecoc,
    "best",
    ret = c("acc", "threshold", "sens", "spec", "ppv", "npv")
  )

pROC::roc(
  model_comps_df$thirtyday_mortality_overall,
  model_comps_df$Enterococcus,
  smoothed = TRUE,
  ci = TRUE,
  plot = TRUE,
  auc.polygon = TRUE,
  best.method = TRUE,
  print.auc = TRUE,
  print.auc.col = "black",
  col = "#2F472F",
  auc.polygon.border = "black",
  auc.polygon.col = "gray65",
  print.thres.best.method = "youden"
)
## 
## Call:
## roc.default(response = model_comps_df$thirtyday_mortality_overall,     predictor = model_comps_df$Enterococcus, ci = TRUE, plot = TRUE,     smoothed = TRUE, auc.polygon = TRUE, best.method = TRUE,     print.auc = TRUE, print.auc.col = "black", col = "#2F472F",     auc.polygon.border = "black", auc.polygon.col = "gray65",     print.thres.best.method = "youden")
## 
## Data: model_comps_df$Enterococcus in 102 controls (model_comps_df$thirtyday_mortality_overall Survivor) < 45 cases (model_comps_df$thirtyday_mortality_overall Non-Survivor).
## Area under the curve: 0.6842
## 95% CI: 0.5953-0.7731 (DeLong)
text(
  paste("ACC:", round(coordinates_ecoc$accuracy, 3) * 100, "%"),
  x = 0.5,
  y = 0.45,
  adj = 0
)
text(paste("PPV:", round(coordinates_ecoc$ppv, 2)),
  x = 0.5,
  y = 0.41,
  adj = 0
)
text(paste("NPV:", round(coordinates_ecoc$npv, 2)),
  x = 0.5,
  y = 0.37,
  adj = 0
)
text(paste("Threshold:", round(coordinates_ecoc$threshold, 2)),
  x = 0.5,
  y = 0.33,
  adj = 0
)

# Enterococcus Relative Abundance Model Metrics
coordinates_mmp_ecoc <- model_comps_df %>%
  bind_cols(coordinates_ecoc$threshold) %>%
  dplyr::rename(threshold = ...19) %>%
  mutate(prediction = ifelse(Enterococcus >= threshold, "Non-Survivor", "Survivor"))

caret::confusionMatrix(table(
  factor(
    coordinates_mmp_ecoc$prediction,
    levels = c("Survivor", "Non-Survivor")
  ),
  factor(
    coordinates_mmp_ecoc$thirtyday_mortality_overall,
    levels = c("Survivor", "Non-Survivor")
  )
))
## Confusion Matrix and Statistics
## 
##               
##                Survivor Non-Survivor
##   Survivor           52            6
##   Non-Survivor       50           39
##                                           
##                Accuracy : 0.619           
##                  95% CI : (0.5354, 0.6978)
##     No Information Rate : 0.6939          
##     P-Value [Acc > NIR] : 0.9786          
##                                           
##                   Kappa : 0.2957          
##                                           
##  Mcnemar's Test P-Value : 9.132e-09       
##                                           
##             Sensitivity : 0.5098          
##             Specificity : 0.8667          
##          Pos Pred Value : 0.8966          
##          Neg Pred Value : 0.4382          
##              Prevalence : 0.6939          
##          Detection Rate : 0.3537          
##    Detection Prevalence : 0.3946          
##       Balanced Accuracy : 0.6882          
##                                           
##        'Positive' Class : Survivor        
## 
# Build df of Shannon and Enterococcus
mds_ecoc <- model_comps_df %>%
  select(md_score, Enterococcus) %>%
  mutate(
    outlier = ifelse(
      md_score < coordinates_mds$threshold & Enterococcus >= 0.199,
      "Outlier",
      "Not Outlier"
    )
  )

# Correlation plot of MDS with Enterococcus
gg_mds_ecoc_scatter <-
  ggscatter(
    mds_ecoc,
    y = "Enterococcus",
    x = "md_score",
    size = 3,
    color = "outlier",
    alpha = 0.2,
    palette = "lancet",
    add = "reg.line",
    add.params = list(color = "black"),
    conf.int = TRUE
  ) +
  stat_cor(method = "spearman") +
  geom_hline(yintercept = 0.30, linetype = "longdash") +
  geom_vline(xintercept = coordinates_mds$threshold, linetype = "longdash") +
  xlab("\nMD Score") +
  ylab("Enterococcus Relative Abundance (%)\n") +
  guides(color = guide_legend("Outlier"))

gg_mds_ecoc_scatter

cairo_pdf(
  "./Results/MDS_Enterococcus_Correlation_train.pdf",
  height = 6,
  width = 8
)
gg_mds_ecoc_scatter
invisible(dev.off())

# Number of Ecoc Expansions/High MDS and Ecoc Expansions/Low MDS
model_comps_df %>%
  select(md_score, Enterococcus) %>%
  mutate(
    measure = case_when(
      Enterococcus >= 0.199 &
        md_score < coordinates_mds$threshold ~ "Expan_LMDS",
      Enterococcus >= 0.199 &
        md_score >= coordinates_mds$threshold ~ "Expan_HMDS",
      Enterococcus < 0.199 &
        md_score < coordinates_mds$threshold ~ "NoExpan_LMDS",
      Enterococcus < 0.199 &
        md_score >= coordinates_mds$threshold ~ "NoExpan_HMDS"
    )
  ) %>%
  group_by(measure) %>%
  tally() %>%
  dplyr::rename(count = n) %>%
  mutate(
    total = sum(count),
    percent = (count / total) * 100
  ) %>%
  write.csv(.,
    "./Results/Enterococcus_Expansion_MD_Score_train.csv",
    row.names = FALSE
  )

# Enterobacterales Relative Abundance
pROC_obj_ebac <- pROC::roc(
  model_comps_df$thirtyday_mortality_overall,
  model_comps_df$Enterobacterales,
  smoothed = TRUE,
  ci = TRUE,
  plot = FALSE,
  auc.polygon = TRUE,
  best.method = TRUE,
  print.auc = TRUE,
  print.auc.col = "black",
  col = "#2F472F",
  auc.polygon.border = "black",
  auc.polygon.col = "gray65",
  print.thres.best.method = "youden"
)

coordinates_ebac <-
  pROC::coords(pROC_obj_ebac,
    "best",
    ret = c("acc", "threshold", "sens", "spec", "ppv", "npv")
  )

pROC::roc(
  model_comps_df$thirtyday_mortality_overall,
  model_comps_df$Enterobacterales,
  smoothed = TRUE,
  ci = TRUE,
  plot = TRUE,
  auc.polygon = TRUE,
  best.method = TRUE,
  print.auc = TRUE,
  print.auc.col = "black",
  col = "#2F472F",
  auc.polygon.border = "black",
  auc.polygon.col = "gray65",
  print.thres.best.method = "youden"
)
## 
## Call:
## roc.default(response = model_comps_df$thirtyday_mortality_overall,     predictor = model_comps_df$Enterobacterales, ci = TRUE, plot = TRUE,     smoothed = TRUE, auc.polygon = TRUE, best.method = TRUE,     print.auc = TRUE, print.auc.col = "black", col = "#2F472F",     auc.polygon.border = "black", auc.polygon.col = "gray65",     print.thres.best.method = "youden")
## 
## Data: model_comps_df$Enterobacterales in 102 controls (model_comps_df$thirtyday_mortality_overall Survivor) < 45 cases (model_comps_df$thirtyday_mortality_overall Non-Survivor).
## Area under the curve: 0.5353
## 95% CI: 0.4291-0.6415 (DeLong)
text(
  paste("ACC:", round(coordinates_ebac$accuracy, 3) * 100, "%"),
  x = 0.5,
  y = 0.45,
  adj = 0
)
text(paste("PPV:", round(coordinates_ebac$ppv, 2)),
  x = 0.5,
  y = 0.41,
  adj = 0
)
text(paste("NPV:", round(coordinates_ebac$npv, 2)),
  x = 0.5,
  y = 0.37,
  adj = 0
)
text(paste("Threshold:", round(coordinates_ebac$threshold, 2)),
  x = 0.5,
  y = 0.33,
  adj = 0
)

# Enterobacterales Relative Abundance Model Metrics
coordinates_mmp_ebacc <- model_comps_df %>%
  bind_cols(coordinates_ebac$threshold) %>%
  dplyr::rename(threshold = ...19) %>%
  mutate(prediction = ifelse(Enterobacterales >= threshold, "Non-Survivor", "Survivor"))

caret::confusionMatrix(table(
  factor(
    coordinates_mmp_ecoc$prediction,
    levels = c("Survivor", "Non-Survivor")
  ),
  factor(
    coordinates_mmp_ecoc$thirtyday_mortality_overall,
    levels = c("Survivor", "Non-Survivor")
  )
))
## Confusion Matrix and Statistics
## 
##               
##                Survivor Non-Survivor
##   Survivor           52            6
##   Non-Survivor       50           39
##                                           
##                Accuracy : 0.619           
##                  95% CI : (0.5354, 0.6978)
##     No Information Rate : 0.6939          
##     P-Value [Acc > NIR] : 0.9786          
##                                           
##                   Kappa : 0.2957          
##                                           
##  Mcnemar's Test P-Value : 9.132e-09       
##                                           
##             Sensitivity : 0.5098          
##             Specificity : 0.8667          
##          Pos Pred Value : 0.8966          
##          Neg Pred Value : 0.4382          
##              Prevalence : 0.6939          
##          Detection Rate : 0.3537          
##    Detection Prevalence : 0.3946          
##       Balanced Accuracy : 0.6882          
##                                           
##        'Positive' Class : Survivor        
## 
# Build df of Shannon and Enterobacterales
mds_ebac <- model_comps_df %>%
  select(md_score, Enterobacterales) %>%
  mutate(
    outlier = ifelse(
      md_score < coordinates_mds$threshold & Enterobacterales >= 0.025,
      "Outlier",
      "Not Outlier"
    )
  )

# Correlation plot of MDS with Enterobacterales
gg_mds_ebac_scatter <-
  ggscatter(
    mds_ebac,
    y = "Enterobacterales",
    x = "md_score",
    size = 3,
    color = "outlier",
    alpha = 0.2,
    palette = "lancet",
    add = "reg.line",
    add.params = list(color = "black"),
    conf.int = TRUE
  ) +
  stat_cor(method = "spearman") +
  geom_hline(yintercept = 0.30, linetype = "longdash") +
  geom_vline(xintercept = coordinates_mds$threshold, linetype = "longdash") +
  xlab("\nMD Score") +
  ylab("Enterobacterales Relative Abundance (%)\n") +
  guides(color = guide_legend("Outlier"))

gg_mds_ebac_scatter

cairo_pdf(
  "./Results/MDS_Enterobacterales_Correlation_train.pdf",
  height = 6,
  width = 8
)
gg_mds_ebac_scatter
invisible(dev.off())

# Number of Ebac Expansions/High MDS and Ebac Expansions/Low MDS
model_comps_df %>%
  select(md_score, Enterobacterales) %>%
  mutate(
    measure = case_when(
      Enterobacterales >= 0.025 &
        md_score < coordinates_mds$threshold ~ "Expan_LMDS",
      Enterobacterales >= 0.025 &
        md_score >= coordinates_mds$threshold ~ "Expan_HMDS",
      Enterobacterales < 0.025 &
        md_score < coordinates_mds$threshold ~ "NoExpan_LMDS",
      Enterobacterales < 0.025 &
        md_score >= coordinates_mds$threshold ~ "NoExpan_HMDS"
    )
  ) %>%
  group_by(measure) %>%
  tally() %>%
  dplyr::rename(count = n) %>%
  mutate(
    total = sum(count),
    percent = (count / total) * 100
  ) %>%
  write.csv(.,
    "./Results/Enterobacterales_Expansion_MD_Score_train.csv",
    row.names = FALSE
  )

Kaplan-Meier Survival Analyses: MDS (5.5), Enterococcus (19.9%), Enterobacterales (2.5%), Shannon (2.16)

km_nocovid <- micu_new_nocovid_oc %>%
  select(
    unique_id,
    sampleid,
    metabolomicsID,
    days_until_death_overall,
    censoring_thirtyday_mortality_overall,
    thirtyday_mortality_overall
  ) %>%
  ungroup() %>%
  mutate(
    surv_days = ifelse(
      is.na(days_until_death_overall) &
        thirtyday_mortality_overall == "Survivor",
      censoring_thirtyday_mortality_overall,
      days_until_death_overall
    ),
    surv_days = ifelse(
      is.na(surv_days) &
        thirtyday_mortality_overall == "Survivor",
      30,
      surv_days
    ),
    surv_days = ifelse(
      surv_days > 30 &
        thirtyday_mortality_overall == "Survivor",
      30,
      surv_days
    ),
    thirtyday_mortality_overall_class = ifelse(thirtyday_mortality_overall == "Survivor", 0, 1)
  ) %>% # Non-Survivor is 1
  left_join(km_nocovid_final %>% select(metabolomicsID, thirtyday_mortality_overall, grouped_md_score)) %>%
  left_join(model_comps_df %>%
      mutate(enterococcus_domination_threshold = ifelse(Enterococcus >= 0.199, 1, 0),
             enterobacterales_domination_threshold = ifelse(Enterobacterales >= 0.025, 1, 0)) %>%
      select(unique_id, enterococcus_domination_threshold, enterobacterales_domination_threshold)
  ) %>%
  left_join(
    alpha_shannon %>%
      left_join(
        micu_new_nocovid_oc %>%
          ungroup() %>%
          select(unique_id, shotgunSeq_id) %>%
          distinct(shotgunSeq_id, .keep_all = TRUE),
        by = "shotgunSeq_id"
      )
  ) %>%
  mutate(
    shannon_class = ifelse(
      Shannon >= coords(
        pROC_obj_shannon,
        "best",
        ret = c("threshold", "sens", "spec", "ppv", "npv")
      )[1][[1]],
      paste0(
        "High Diversity (Shannon > ",
        round(cutpoints_unnest_summary_shannon$optimal_cutpoint, 2),
        ")"
      ),
      paste0(
        "Low Diversity (Shannon < ",
        round(cutpoints_unnest_summary_shannon$optimal_cutpoint, 2),
        ")"
      )
    ),
    enterococcus_domination_threshold = ifelse(
      enterococcus_domination_threshold == 1,
      "Enterococcous Domination",
      "No Enterococcus Domination"
    ),
    enterococcus_domination_threshold = factor(
      enterococcus_domination_threshold,
      levels = c("Enterococcous Domination", "No Enterococcus Domination")
    ),
    enterobacterales_domination_threshold = ifelse(
      enterobacterales_domination_threshold == 1,
      "Enterobacterales Domination",
      "No Enterobacterales Domination"
    ),
    enterobacterales_domination_threshold = factor(
      enterobacterales_domination_threshold,
      levels = c("Enterobacterales Domination", "No Enterobacterales Domination")
    )
  )

# KM Curves: MD Score
set.seed(123)
surv_object <-
  Surv(
    time = km_nocovid$surv_days,
    event = km_nocovid$thirtyday_mortality_overall_class
  )

fit1 <- survfit(surv_object ~ grouped_md_score, data = km_nocovid)

ggs <- ggsurvplot(
  fit1,
  data = km_nocovid,
  size = 1,
  palette = c("#C45258", "#2F4858"),
  xlab = "Days from Admission",
  conf.int = TRUE,
  pval = TRUE,
  risk.table = "abs_pct",
  legend = "bottom",
  risk.table.height = 0.4,
  risk.table.y.text.col = TRUE,
  tables.y.text = FALSE,
  risk.table.fontsize = 2.8,
  pval.size = 3.5,
  ggtheme = theme_test() + theme(
    panel.grid.major = el(linewidth = 0.5, color = "gray90"),
    axis.text.y = et(color = "black", size = 10),
    axis.title.y = et(color = "black")
  ),
  legend.labs = c("High MD Score", "Low MD Score")
)

# Change table axis labels
ggs$table <-
  ggs$table + labs(x = NULL, y = NULL) + theme(plot.title = eb()) # risk table

ggs

pdf(
  "./Results/kaplan_meier_roc_loop_30_Day_Mortality_train.pdf",
  height = 4,
  width = 6,
  onefile = FALSE
)
ggs
invisible(dev.off())

# Restricted Mean Survival Time
rmst_mds <-
survRM2::rmst2(
  km_nocovid$surv_days,
  km_nocovid$thirtyday_mortality_overall_class,
  factor(
    km_nocovid$grouped_md_score,
    levels = c("Low Score", "High Score"),
    labels = c(1, 0)                      # Low Score = 1, High Score = 0, due to area = Low Score - High Score
  ),
  tau = 30
)

plot(rmst_mds, xlab = "Days", ylab = "Survival Probability")

# KM Curves: Enterococcus Domination (>= 0.199) #coordinates_ecoc$threshold)
set.seed(123)
surv_object2 <-
  Surv(
    time = km_nocovid$surv_days,
    event = km_nocovid$thirtyday_mortality_overall_class
  )

fit2 <-
  survfit(surv_object2 ~ enterococcus_domination_threshold, data = km_nocovid)

ggs_ecoc <- ggsurvplot(
  fit2,
  data = km_nocovid,
  size = 1,
  palette = c("#C4335F", "#047D6B"),
  xlab = "Days from Admission",
  conf.int = TRUE,
  pval = TRUE,
  risk.table = "abs_pct",
  legend = "bottom",
  risk.table.height = 0.4,
  risk.table.y.text.col = TRUE,
  tables.y.text = FALSE,
  risk.table.fontsize = 2.8,
  pval.size = 3.5,
  ggtheme = theme_test() + theme(
    panel.grid.major = el(linewidth = 0.5, color = "gray90"),
    axis.text.y = et(color = "black", size = 10),
    axis.title.y = et(color = "black")
  ),
  surv.median.line = "hv",
  legend.labs = c(
    paste0("Enterococcus Domination (>", round(0.199
    * 100, 2), "%)"),
    "No Domination"
  )
)

# Change table axis labels
ggs_ecoc$table <-
  ggs_ecoc$table + labs(x = NULL, y = NULL) + theme(plot.title = eb()) # risk table

ggs_ecoc

pdf(
  "./Results/kaplan_meier_enterococcus_30_Day_Mortality_train.pdf",
  height = 4,
  width = 6,
  onefile = FALSE
)
ggs_ecoc
invisible(dev.off())

# KM Curves: Enterobacterales Domination (>= 0.025) #coordinates_ebac$threshold)
set.seed(123)
surv_object3 <-
  Surv(
    time = km_nocovid$surv_days,
    event = km_nocovid$thirtyday_mortality_overall_class
  )

fit3 <-
  survfit(surv_object2 ~ enterobacterales_domination_threshold, data = km_nocovid)

ggs_ebac <- ggsurvplot(
  fit3,
  data = km_nocovid,
  size = 1,
  palette = c("#C4335F", "#047D6B"),
  xlab = "Days from Admission",
  conf.int = TRUE,
  pval = TRUE,
  risk.table = "abs_pct",
  legend = "bottom",
  risk.table.height = 0.4,
  risk.table.y.text.col = TRUE,
  tables.y.text = FALSE,
  risk.table.fontsize = 2.8,
  pval.size = 3.5,
  ggtheme = theme_test() + theme(
    panel.grid.major = el(linewidth = 0.5, color = "gray90"),
    axis.text.y = et(color = "black", size = 10),
    axis.title.y = et(color = "black")
  ),
  surv.median.line = "hv",
  legend.labs = c(
    paste0("Enterobacterales Domination (>", round(0.025
    * 100, 2), "%)"),
    "No Domination"
  )
)

# Change table axis labels
ggs_ebac$table <-
  ggs_ebac$table + labs(x = NULL, y = NULL) + theme(plot.title = eb()) # risk table

ggs_ebac

pdf(
  "./Results/kaplan_meier_enterobacterales_30_Day_Mortality_train.pdf",
  height = 4,
  width = 6,
  onefile = FALSE
)
ggs_ebac
invisible(dev.off())

# KM Curves: Shannon Diversity (>cutpoints_unnest_summary_shannon$optimal_cutpoint)
set.seed(123)
surv_object4 <-
  Surv(
    time = km_nocovid$surv_days,
    event = km_nocovid$thirtyday_mortality_overall_class
  )

fit4 <- survfit(surv_object4 ~ shannon_class, data = km_nocovid)

ggs_shannon <- ggsurvplot(
  fit4,
  data = km_nocovid,
  size = 1,
  palette = c("#C4335F", "#047D6B"),
  xlab = "Days from Admission",
  conf.int = TRUE,
  pval = TRUE,
  risk.table = "abs_pct",
  legend = "bottom",
  risk.table.height = 0.4,
  risk.table.y.text.col = TRUE,
  tables.y.text = FALSE,
  risk.table.fontsize = 2.8,
  pval.size = 3.5,
  ggtheme = theme_test() + theme(
    panel.grid.major = el(linewidth = 0.5, color = "gray90"),
    axis.text.y = et(color = "black", size = 10),
    axis.title.y = et(color = "black")
  ),
  surv.median.line = "hv",
  legend.labs = c(
    paste0("High Diversity (Shannon >", round(
      coords(
        pROC_obj_shannon,
        "best",
        ret = c("threshold", "sens", "spec", "ppv", "npv")
      )[1][[1]], 2
    ), ")"),
    paste0("Low Diversity (Shannon <=", round(
      coords(
        pROC_obj_shannon,
        "best",
        ret = c("threshold", "sens", "spec", "ppv", "npv")
      )[1][[1]], 2
    ), ")")
  )
)

# Change table axis labels
ggs_shannon$table <-
  ggs_shannon$table + labs(x = NULL, y = NULL) + theme(plot.title = eb()) # risk table

ggs_shannon

pdf(
  "./Results/kaplan_meier_shannon_30_Day_Mortality_train.pdf",
  height = 4,
  width = 6,
  onefile = FALSE
)
ggs_shannon
invisible(dev.off())

Cox Proportional Hazards Regression Analyses: MDS, Enterococcus, Enterobacterales, Shannon

# Variables labels
cox_df <- tableone_nocovid_df_filt %>%
  labelled::remove_labels() %>%
  janitor::clean_names() %>%
  mutate(
    race_factor = as.character(race_factor),
    race_factor = ifelse(
      race_factor %in% c("Asian", "More than one race", "White, Hispanic"),
      "Other",
      race_factor
    )
  ) %>%
  left_join(
    micu_nocovid_first_samps_omics_light %>%
      group_by(metabolomicsID) %>%
      slice(1) %>%
      select(unique_id, metabolomicsID)
  ) %>%
  left_join(km_nocovid_final %>% select(metabolomicsID, md_score)) %>%
  mutate(grouped_md_score = ifelse(
    md_score >= coordinates_mds$threshold,
    "High Score",
    "Low Score"
  )) %>%
  right_join(
    micu_new_nocovid_oc %>% select(
      unique_id,
      days_until_death_overall,
      censoring_thirtyday_mortality_overall,
      thirtyday_mortality_overall
    )
  ) %>%
  mutate(
    surv_days = ifelse(
      is.na(days_until_death_overall) &
        thirtyday_mortality_overall == "Survivor",
      censoring_thirtyday_mortality_overall,
      days_until_death_overall
    ),
    surv_days = ifelse(
      is.na(surv_days) &
        thirtyday_mortality_overall == "Survivor",
      30,
      surv_days
    ),
    surv_days = ifelse(
      surv_days > 30 &
        thirtyday_mortality_overall == "Survivor",
      30,
      surv_days
    ),
    thirtyday_mortality_overall_class = ifelse(thirtyday_mortality_overall == "Survivor", 0, 1)
  ) %>%
  group_by(metabolomicsID) %>%
  dplyr::slice(1) %>%
  left_join(
    alpha_shannon %>%
      left_join(
        micu_new_nocovid_oc %>%
          ungroup() %>%
          select(unique_id, shotgunSeq_id) %>%
          distinct(shotgunSeq_id, .keep_all = TRUE),
        by = "shotgunSeq_id"
      )
  ) %>%
  left_join(
    model_comps_df %>%
      mutate(enterococcus_domination_threshold = ifelse(Enterococcus >= 0.199, 1, 0),
             enterobacterales_domination_threshold = ifelse(Enterobacterales >= 0.025, 1, 0)) %>%
      select(unique_id, enterococcus_domination_threshold, enterobacterales_domination_threshold)
  ) %>%
  mutate(
    shannon_class = ifelse(
      Shannon >= coords(
        pROC_obj_shannon,
        "best",
        ret = c("threshold", "sens", "spec", "ppv", "npv")
      )[1][[1]],
      paste0(
        "High Diversity (Shannon > ",
        round(cutpoints_unnest_summary_shannon$optimal_cutpoint, 2),
        ")"
      ),
      paste0(
        "Low Diversity (Shannon < ",
        round(cutpoints_unnest_summary_shannon$optimal_cutpoint, 2),
        ")"
      )
    ),
    enterococcus_domination_threshold = ifelse(
      enterococcus_domination_threshold == 1,
      "Enterococcous Domination",
      "No Enterococcus Domination"
    ),
    enterococcus_domination_threshold = factor(
      enterococcus_domination_threshold,
      levels = c("Enterococcous Domination", "No Enterococcus Domination")
    ),
    enterobacterales_domination_threshold = ifelse(
      enterobacterales_domination_threshold == 1,
      "Enterobacterales Domination",
      "No Enterobacterales Domination"
    ),
    enterobacterales_domination_threshold = factor(
      enterobacterales_domination_threshold,
      levels = c("Enterobacterales Domination", "No Enterobacterales Domination")
    )
  ) %>%
  dplyr::rename(`Charlson Comorbidity Index` = cci_total_sc) %>%
  mutate(diet = ifelse(diet == "1", "Diet", "NPO")) %>% 
  dplyr::rename(
    `Sex` = "sex_factor",
    `Age` = "age",
    `Acute respiratory distress syndrome` = "ards_factor",
    `Sepsis` = "sepsis_factor",
    `SOFA Score` = "sofa_score_total",
    `Race` = "race_factor",
    `Time to stool sample` = "day_collected",
    `Diet` = "diet",
    `MDS` = "md_score",
    `Enterococcus Domination` = "enterococcus_domination_threshold",
    `Enterobacterales Domination` = "enterobacterales_domination_threshold",
    `Shannon Diversity` = "Shannon"
  )


reset_gtsummary_theme()

coxauc <-
  coxph(
    Surv(cox_df$surv_days, cox_df$thirtyday_mortality_overall_class) ~
      `Sex` +
      `Age` +
      `Charlson Comorbidity Index` +
      `Acute respiratory distress syndrome` +
      `Sepsis` +
      `SOFA Score` +
      `Race` +
      `Time to stool sample` +
      `Diet` +
      `MDS`,
    data = cox_df
  ) %>%
  tbl_regression(
    exp = TRUE,
    pvalue_fun = function(x) {
      if_else(is.na(x), NA_character_, if_else(
        x < 0.001,
        format(x,
          digits = 3, scientific = TRUE
        ),
        format(round(x, 3),
          scientific = F
        )
      ))
    }
  ) %>%
  modify_footnote(everything() ~ NA, abbreviation = TRUE)


coxauc %>%
  gtsummary::modify_caption("**Cox Proportional Hazards Regression**")
Cox Proportional Hazards Regression
Characteristic HR 95% CI p-value
Sex
    Female
    Male 1.31 0.64, 2.69 0.463
Age 0.98 0.96, 1.01 0.240
Charlson Comorbidity Index 1.26 1.09, 1.45 0.001
Acute respiratory distress syndrome
    No
    Yes 2.41 1.04, 5.61 0.041
Sepsis
    None
    Sepsis 1.56 0.58, 4.19 0.377
SOFA Score 1.03 0.95, 1.11 0.480
Race
    African American
    Other 1.79 0.45, 7.20 0.409
    White, non-Hispanic 1.71 0.76, 3.84 0.197
Time to stool sample 0.96 0.87, 1.06 0.451
Diet
    Diet
    NPO 0.92 0.38, 2.20 0.853
MDS 1.71 1.43, 2.05 5.46e-09
# In case you get an error: "Error in s$close() : attempt to apply non-function", run this code below:
# f <- chromote::default_chromote_object() #get the f object
# f$close()

gt::gtsave(gtsummary::as_gt(coxauc), file = "./Results/cox_model_SOFA_30_Day_Mortality_roc_loop_train.png")

# Enterococcus Domination
coxauc_ecoc <-
  coxph(
    Surv(cox_df$surv_days, cox_df$thirtyday_mortality_overall_class) ~
      `Sex` +
      `Age` +
      `Charlson Comorbidity Index` +
      `Acute respiratory distress syndrome` +
      `Sepsis` +
      `SOFA Score` +
      `Race` +
      `Time to stool sample` +
      `Diet` + 
      `Enterococcus Domination`,
    data = cox_df
  ) %>%
  tbl_regression(
    exp = TRUE,
    pvalue_fun = function(x) {
      if_else(is.na(x), NA_character_, if_else(
        x < 0.001,
        format(x,
          digits = 3, scientific = TRUE
        ),
        format(round(x, 3),
          scientific = F
        )
      ))
    }
  ) %>%
  modify_footnote(everything() ~ NA, abbreviation = TRUE)

coxauc_ecoc %>%
  gtsummary::modify_caption("**Cox Proportional Hazards Regression**")
Cox Proportional Hazards Regression
Characteristic HR 95% CI p-value
Sex
    Female
    Male 1.31 0.66, 2.59 0.435
Age 0.99 0.97, 1.02 0.668
Charlson Comorbidity Index 1.12 0.97, 1.30 0.124
Acute respiratory distress syndrome
    No
    Yes 2.56 1.19, 5.49 0.016
Sepsis
    None
    Sepsis 1.79 0.68, 4.71 0.235
SOFA Score 1.06 0.98, 1.15 0.131
Race
    African American
    Other 1.95 0.51, 7.43 0.330
    White, non-Hispanic 2.68 1.29, 5.59 0.009
Time to stool sample 1.01 0.92, 1.11 0.828
Diet
    Diet
    NPO 1.93 0.91, 4.09 0.086
Enterococcus Domination
    Enterococcous Domination
    No Enterococcus Domination 0.61 0.31, 1.21 0.155
gt::gtsave(gtsummary::as_gt(coxauc_ecoc), file = "./Results/cox_model_SOFA_Enterococcus_30_Day_Mortality_train.png")

# Enterococcus Domination
coxauc_ebac <-
  coxph(
    Surv(cox_df$surv_days, cox_df$thirtyday_mortality_overall_class) ~
      `Sex` +
      `Age` +
      `Charlson Comorbidity Index` +
      `Acute respiratory distress syndrome` +
      `Sepsis` +
      `SOFA Score` +
      `Race` +
      `Time to stool sample` +
      `Diet` + +
      `Enterobacterales Domination`,
    data = cox_df
  ) %>%
  tbl_regression(
    exp = TRUE,
    pvalue_fun = function(x) {
      if_else(is.na(x), NA_character_, if_else(
        x < 0.001,
        format(x,
          digits = 3, scientific = TRUE
        ),
        format(round(x, 3),
          scientific = F
        )
      ))
    }
  ) %>%
  modify_footnote(everything() ~ NA, abbreviation = TRUE)

coxauc_ebac %>%
  gtsummary::modify_caption("**Cox Proportional Hazards Regression**")
Cox Proportional Hazards Regression
Characteristic HR 95% CI p-value
Sex
    Female
    Male 1.22 0.62, 2.40 0.558
Age 0.99 0.97, 1.02 0.510
Charlson Comorbidity Index 1.15 0.99, 1.33 0.064
Acute respiratory distress syndrome
    No
    Yes 2.59 1.20, 5.56 0.015
Sepsis
    None
    Sepsis 1.71 0.64, 4.53 0.283
SOFA Score 1.06 0.98, 1.15 0.115
Race
    African American
    Other 1.94 0.50, 7.52 0.339
    White, non-Hispanic 2.64 1.28, 5.44 0.009
Time to stool sample 1.02 0.93, 1.13 0.651
Diet
    Diet
    NPO 1.84 0.88, 3.86 0.108
Enterobacterales Domination
    Enterobacterales Domination
    No Enterobacterales Domination 0.95 0.47, 1.93 0.895
gt::gtsave(gtsummary::as_gt(coxauc_ebac), file = "./Results/cox_model_SOFA_Enterobacterales_30_Day_Mortality_train.png")

# Shannon Diversity
coxauc_shannon <-
  coxph(
    Surv(cox_df$surv_days, cox_df$thirtyday_mortality_overall_class) ~
      `Sex` +
      `Age` +
      `Charlson Comorbidity Index` +
      `Acute respiratory distress syndrome` +
      `Sepsis` +
      `SOFA Score` +
      `Race` +
      `Time to stool sample` +
      `Diet` + 
      `Shannon Diversity`,
    data = cox_df
  ) %>%
  tbl_regression(
    exp = TRUE,
    pvalue_fun = function(x) {
      if_else(is.na(x), NA_character_, if_else(
        x < 0.001,
        format(x,
          digits = 3, scientific = TRUE
        ),
        format(round(x, 3),
          scientific = F
        )
      ))
    }
  ) %>%
  modify_footnote(everything() ~ NA, abbreviation = TRUE)


coxauc_shannon %>%
  gtsummary::modify_caption("**Cox Proportional Hazards Regression**")
Cox Proportional Hazards Regression
Characteristic HR 95% CI p-value
Sex
    Female
    Male 1.23 0.63, 2.40 0.544
Age 0.99 0.96, 1.02 0.494
Charlson Comorbidity Index 1.15 1.00, 1.33 0.056
Acute respiratory distress syndrome
    No
    Yes 2.60 1.21, 5.57 0.014
Sepsis
    None
    Sepsis 1.72 0.65, 4.53 0.270
SOFA Score 1.07 0.98, 1.16 0.120
Race
    African American
    Other 1.96 0.51, 7.55 0.329
    White, non-Hispanic 2.65 1.27, 5.56 0.010
Time to stool sample 1.02 0.93, 1.13 0.655
Diet
    Diet
    NPO 1.83 0.87, 3.84 0.109
Shannon Diversity 1.02 0.74, 1.40 0.915
gt::gtsave(gtsummary::as_gt(coxauc_shannon), file = "./Results/cox_model_SOFA_Shannon_30_Day_Mortality_train.png")

Beta Diversity

thirtyday_mortality_overall_vector <-
  t_metaphlan_micu_nocovid_mat %>%
  rownames_to_column(var = "shotgunSeq_id") %>%
  select(shotgunSeq_id) %>%
  left_join(micu_new_nocovid_oc %>%
    select(shotgunSeq_id, thirtyday_mortality_overall)) %>%
  column_to_rownames(var = "shotgunSeq_id") %>%
  pull(thirtyday_mortality_overall)

beta_dist <-
  vegdist(t_metaphlan_micu_nocovid_mat, index = "bray-curtis")

mds <-
  metaMDS(
    beta_dist,
    k = 3,
    distance = "bray-curtis",
    trymax = 500,
    wascores = TRUE
  )
## Run 0 stress 0.1722473 
## Run 1 stress 0.1702742 
## ... New best solution
## ... Procrustes: rmse 0.03972199  max resid 0.2464389 
## Run 2 stress 0.1729343 
## Run 3 stress 0.1732791 
## Run 4 stress 0.1746851 
## Run 5 stress 0.172584 
## Run 6 stress 0.1728354 
## Run 7 stress 0.1715155 
## Run 8 stress 0.1758227 
## Run 9 stress 0.1705667 
## ... Procrustes: rmse 0.03359764  max resid 0.1109341 
## Run 10 stress 0.1708368 
## Run 11 stress 0.1727566 
## Run 12 stress 0.1711217 
## Run 13 stress 0.174345 
## Run 14 stress 0.1715655 
## Run 15 stress 0.1743469 
## Run 16 stress 0.1743357 
## Run 17 stress 0.1719752 
## Run 18 stress 0.1731659 
## Run 19 stress 0.1735044 
## Run 20 stress 0.1726203 
## Run 21 stress 0.1732842 
## Run 22 stress 0.1700196 
## ... New best solution
## ... Procrustes: rmse 0.02110721  max resid 0.2226543 
## Run 23 stress 0.1711147 
## Run 24 stress 0.1723538 
## Run 25 stress 0.1706113 
## Run 26 stress 0.1728592 
## Run 27 stress 0.171748 
## Run 28 stress 0.1762772 
## Run 29 stress 0.1715985 
## Run 30 stress 0.1743403 
## Run 31 stress 0.1778621 
## Run 32 stress 0.1752725 
## Run 33 stress 0.1709985 
## Run 34 stress 0.1755264 
## Run 35 stress 0.1703259 
## ... Procrustes: rmse 0.01830155  max resid 0.1396967 
## Run 36 stress 0.1723814 
## Run 37 stress 0.1732575 
## Run 38 stress 0.1747133 
## Run 39 stress 0.1741563 
## Run 40 stress 0.1727719 
## Run 41 stress 0.1710834 
## Run 42 stress 0.170189 
## ... Procrustes: rmse 0.01875297  max resid 0.1484712 
## Run 43 stress 0.1705058 
## ... Procrustes: rmse 0.02085675  max resid 0.1477739 
## Run 44 stress 0.1732815 
## Run 45 stress 0.1747758 
## Run 46 stress 0.1748345 
## Run 47 stress 0.1713831 
## Run 48 stress 0.1754558 
## Run 49 stress 0.1710102 
## Run 50 stress 0.1701975 
## ... Procrustes: rmse 0.02022863  max resid 0.1744679 
## Run 51 stress 0.1777171 
## Run 52 stress 0.1720756 
## Run 53 stress 0.1700078 
## ... New best solution
## ... Procrustes: rmse 0.01014964  max resid 0.06002796 
## Run 54 stress 0.171115 
## Run 55 stress 0.170562 
## Run 56 stress 0.1766925 
## Run 57 stress 0.1728406 
## Run 58 stress 0.1723206 
## Run 59 stress 0.1773058 
## Run 60 stress 0.172621 
## Run 61 stress 0.171711 
## Run 62 stress 0.1709704 
## Run 63 stress 0.1715218 
## Run 64 stress 0.1750171 
## Run 65 stress 0.1743517 
## Run 66 stress 0.1752622 
## Run 67 stress 0.1707928 
## Run 68 stress 0.170567 
## Run 69 stress 0.1742744 
## Run 70 stress 0.1711493 
## Run 71 stress 0.1740048 
## Run 72 stress 0.1737491 
## Run 73 stress 0.1730335 
## Run 74 stress 0.1711353 
## Run 75 stress 0.1700615 
## ... Procrustes: rmse 0.01695885  max resid 0.1445767 
## Run 76 stress 0.1705812 
## Run 77 stress 0.1715728 
## Run 78 stress 0.1732692 
## Run 79 stress 0.1773399 
## Run 80 stress 0.1744572 
## Run 81 stress 0.1730212 
## Run 82 stress 0.170974 
## Run 83 stress 0.1736988 
## Run 84 stress 0.1699747 
## ... New best solution
## ... Procrustes: rmse 0.005361797  max resid 0.02835761 
## Run 85 stress 0.1756083 
## Run 86 stress 0.1728627 
## Run 87 stress 0.1737404 
## Run 88 stress 0.1713187 
## Run 89 stress 0.1755835 
## Run 90 stress 0.1738234 
## Run 91 stress 0.1762064 
## Run 92 stress 0.1699781 
## ... Procrustes: rmse 0.002278586  max resid 0.01769524 
## Run 93 stress 0.1784638 
## Run 94 stress 0.1705403 
## Run 95 stress 0.171656 
## Run 96 stress 0.1700537 
## ... Procrustes: rmse 0.01511268  max resid 0.1446036 
## Run 97 stress 0.1764051 
## Run 98 stress 0.170857 
## Run 99 stress 0.1728662 
## Run 100 stress 0.1734832 
## Run 101 stress 0.1701244 
## ... Procrustes: rmse 0.01862166  max resid 0.214643 
## Run 102 stress 0.1744837 
## Run 103 stress 0.1703833 
## ... Procrustes: rmse 0.0237706  max resid 0.1012823 
## Run 104 stress 0.1711758 
## Run 105 stress 0.1702059 
## ... Procrustes: rmse 0.02184254  max resid 0.1537412 
## Run 106 stress 0.1715203 
## Run 107 stress 0.1749778 
## Run 108 stress 0.1722346 
## Run 109 stress 0.1706319 
## Run 110 stress 0.1700037 
## ... Procrustes: rmse 0.008241567  max resid 0.04852925 
## Run 111 stress 0.1703143 
## ... Procrustes: rmse 0.007686657  max resid 0.06850546 
## Run 112 stress 0.1730505 
## Run 113 stress 0.170992 
## Run 114 stress 0.1709716 
## Run 115 stress 0.1748787 
## Run 116 stress 0.1702792 
## ... Procrustes: rmse 0.01882247  max resid 0.2121448 
## Run 117 stress 0.1738115 
## Run 118 stress 0.1752374 
## Run 119 stress 0.1699865 
## ... Procrustes: rmse 0.003813678  max resid 0.02398523 
## Run 120 stress 0.1734579 
## Run 121 stress 0.1719885 
## Run 122 stress 0.1740244 
## Run 123 stress 0.1804941 
## Run 124 stress 0.1745863 
## Run 125 stress 0.1722428 
## Run 126 stress 0.1720872 
## Run 127 stress 0.170202 
## ... Procrustes: rmse 0.006101629  max resid 0.05039583 
## Run 128 stress 0.1750168 
## Run 129 stress 0.1712103 
## Run 130 stress 0.175864 
## Run 131 stress 0.1701525 
## ... Procrustes: rmse 0.006237044  max resid 0.05289448 
## Run 132 stress 0.1725981 
## Run 133 stress 0.1709331 
## Run 134 stress 0.1702036 
## ... Procrustes: rmse 0.02174753  max resid 0.1514201 
## Run 135 stress 0.1706209 
## Run 136 stress 0.176272 
## Run 137 stress 0.1720052 
## Run 138 stress 0.1740328 
## Run 139 stress 0.1715957 
## Run 140 stress 0.1752308 
## Run 141 stress 0.1756965 
## Run 142 stress 0.1759356 
## Run 143 stress 0.1716868 
## Run 144 stress 0.170193 
## ... Procrustes: rmse 0.02139114  max resid 0.1491494 
## Run 145 stress 0.1717959 
## Run 146 stress 0.1732194 
## Run 147 stress 0.1728868 
## Run 148 stress 0.1738484 
## Run 149 stress 0.1736011 
## Run 150 stress 0.1717744 
## Run 151 stress 0.1720742 
## Run 152 stress 0.1709178 
## Run 153 stress 0.1724738 
## Run 154 stress 0.1720453 
## Run 155 stress 0.1758663 
## Run 156 stress 0.1715011 
## Run 157 stress 0.1705807 
## Run 158 stress 0.1773559 
## Run 159 stress 0.1708634 
## Run 160 stress 0.1765719 
## Run 161 stress 0.17245 
## Run 162 stress 0.1716623 
## Run 163 stress 0.1704994 
## Run 164 stress 0.1730055 
## Run 165 stress 0.1718498 
## Run 166 stress 0.1718352 
## Run 167 stress 0.1720521 
## Run 168 stress 0.171151 
## Run 169 stress 0.1767999 
## Run 170 stress 0.1733703 
## Run 171 stress 0.1728102 
## Run 172 stress 0.17283 
## Run 173 stress 0.1713246 
## Run 174 stress 0.1730943 
## Run 175 stress 0.1735978 
## Run 176 stress 0.1721793 
## Run 177 stress 0.1722702 
## Run 178 stress 0.1720106 
## Run 179 stress 0.1746104 
## Run 180 stress 0.1714091 
## Run 181 stress 0.1710962 
## Run 182 stress 0.169984 
## ... Procrustes: rmse 0.003503023  max resid 0.02379281 
## Run 183 stress 0.1706108 
## Run 184 stress 0.176636 
## Run 185 stress 0.1738489 
## Run 186 stress 0.1739459 
## Run 187 stress 0.1706949 
## Run 188 stress 0.1709726 
## Run 189 stress 0.1732765 
## Run 190 stress 0.1708374 
## Run 191 stress 0.1702011 
## ... Procrustes: rmse 0.01721344  max resid 0.1253423 
## Run 192 stress 0.1702548 
## ... Procrustes: rmse 0.01062077  max resid 0.05202653 
## Run 193 stress 0.1706777 
## Run 194 stress 0.1752439 
## Run 195 stress 0.1703838 
## ... Procrustes: rmse 0.02380127  max resid 0.1014419 
## Run 196 stress 0.1765399 
## Run 197 stress 0.1751225 
## Run 198 stress 0.1707002 
## Run 199 stress 0.1709169 
## Run 200 stress 0.1712566 
## Run 201 stress 0.1722115 
## Run 202 stress 0.1708089 
## Run 203 stress 0.1716324 
## Run 204 stress 0.1747587 
## Run 205 stress 0.1714984 
## Run 206 stress 0.1700038 
## ... Procrustes: rmse 0.008274039  max resid 0.04814246 
## Run 207 stress 0.1726895 
## Run 208 stress 0.1750842 
## Run 209 stress 0.1731513 
## Run 210 stress 0.1725067 
## Run 211 stress 0.1740056 
## Run 212 stress 0.1732344 
## Run 213 stress 0.1736555 
## Run 214 stress 0.1716636 
## Run 215 stress 0.1701807 
## ... Procrustes: rmse 0.01659358  max resid 0.09029175 
## Run 216 stress 0.1718393 
## Run 217 stress 0.1771884 
## Run 218 stress 0.1706198 
## Run 219 stress 0.1701933 
## ... Procrustes: rmse 0.02071992  max resid 0.127496 
## Run 220 stress 0.1754697 
## Run 221 stress 0.1705399 
## Run 222 stress 0.1733619 
## Run 223 stress 0.1760203 
## Run 224 stress 0.1706195 
## Run 225 stress 0.1723076 
## Run 226 stress 0.1757544 
## Run 227 stress 0.1729524 
## Run 228 stress 0.1701182 
## ... Procrustes: rmse 0.01836661  max resid 0.2128916 
## Run 229 stress 0.1725693 
## Run 230 stress 0.1728447 
## Run 231 stress 0.1732277 
## Run 232 stress 0.1712754 
## Run 233 stress 0.170544 
## Run 234 stress 0.1718 
## Run 235 stress 0.1703939 
## ... Procrustes: rmse 0.01766559  max resid 0.08661864 
## Run 236 stress 0.1719521 
## Run 237 stress 0.170573 
## Run 238 stress 0.1715227 
## Run 239 stress 0.1735132 
## Run 240 stress 0.1737197 
## Run 241 stress 0.1720843 
## Run 242 stress 0.171207 
## Run 243 stress 0.1768278 
## Run 244 stress 0.1754863 
## Run 245 stress 0.1734471 
## Run 246 stress 0.1727699 
## Run 247 stress 0.1734261 
## Run 248 stress 0.1752161 
## Run 249 stress 0.1735218 
## Run 250 stress 0.1712759 
## Run 251 stress 0.1775293 
## Run 252 stress 0.1740275 
## Run 253 stress 0.1706033 
## Run 254 stress 0.1742523 
## Run 255 stress 0.1737158 
## Run 256 stress 0.1704207 
## ... Procrustes: rmse 0.01325717  max resid 0.0985564 
## Run 257 stress 0.1708013 
## Run 258 stress 0.172435 
## Run 259 stress 0.1710192 
## Run 260 stress 0.1712018 
## Run 261 stress 0.1733337 
## Run 262 stress 0.1707287 
## Run 263 stress 0.1721707 
## Run 264 stress 0.1709771 
## Run 265 stress 0.1748158 
## Run 266 stress 0.1701524 
## ... Procrustes: rmse 0.006193597  max resid 0.05287127 
## Run 267 stress 0.1768127 
## Run 268 stress 0.170566 
## Run 269 stress 0.1706211 
## Run 270 stress 0.170784 
## Run 271 stress 0.1714662 
## Run 272 stress 0.1735238 
## Run 273 stress 0.1707613 
## Run 274 stress 0.1728224 
## Run 275 stress 0.1710416 
## Run 276 stress 0.1713201 
## Run 277 stress 0.1722541 
## Run 278 stress 0.1707322 
## Run 279 stress 0.1726228 
## Run 280 stress 0.1734957 
## Run 281 stress 0.1719486 
## Run 282 stress 0.1711445 
## Run 283 stress 0.1705173 
## Run 284 stress 0.1701798 
## ... Procrustes: rmse 0.01658841  max resid 0.08923627 
## Run 285 stress 0.1711715 
## Run 286 stress 0.1702002 
## ... Procrustes: rmse 0.02162893  max resid 0.1486377 
## Run 287 stress 0.1717969 
## Run 288 stress 0.1719241 
## Run 289 stress 0.1706391 
## Run 290 stress 0.17058 
## Run 291 stress 0.170882 
## Run 292 stress 0.1759791 
## Run 293 stress 0.1723833 
## Run 294 stress 0.1705198 
## Run 295 stress 0.1711688 
## Run 296 stress 0.1715535 
## Run 297 stress 0.1715331 
## Run 298 stress 0.1706106 
## Run 299 stress 0.1699756 
## ... Procrustes: rmse 0.0003840711  max resid 0.003440156 
## ... Similar to previous best
## *** Best solution repeated 1 times
mds_data <- as.data.frame(mds$points)

# Shepards test/goodness of fit
goodness(mds) # Produces a results of test statistics for goodness of fit for each point
##   [1] 0.014328243 0.017516017 0.012681497 0.012439571 0.021978178 0.009925870
##   [7] 0.015899337 0.012455642 0.013289336 0.012963721 0.011682512 0.011388040
##  [13] 0.009089511 0.016292959 0.008950695 0.020070508 0.015325680 0.010430868
##  [19] 0.021821631 0.014046633 0.012199612 0.010841627 0.013883265 0.010736957
##  [25] 0.011156176 0.014118498 0.013114384 0.020079759 0.017145565 0.017397673
##  [31] 0.015789036 0.011700551 0.015360874 0.011361847 0.014379501 0.018433752
##  [37] 0.021195611 0.018091415 0.010911864 0.010453092 0.015464516 0.018218571
##  [43] 0.015562369 0.009305557 0.014701555 0.014053044 0.020655665 0.015937071
##  [49] 0.012238561 0.011738923 0.013874058 0.013896004 0.013977869 0.015556463
##  [55] 0.017815472 0.008771115 0.011955647 0.009586444 0.016649113 0.010939108
##  [61] 0.012914889 0.015349978 0.010770203 0.010850206 0.015439913 0.011672817
##  [67] 0.018228959 0.008030550 0.018455421 0.013841054 0.011205608 0.008218574
##  [73] 0.010053666 0.013908659 0.016991945 0.018551974 0.019113293 0.014119880
##  [79] 0.011242772 0.012165057 0.012349244 0.008830665 0.016481569 0.013024059
##  [85] 0.013303992 0.013833129 0.009707936 0.010945866 0.013181123 0.012816216
##  [91] 0.009520834 0.013331180 0.010432107 0.013223194 0.009171179 0.011446427
##  [97] 0.018951196 0.012838568 0.015373922 0.021980095 0.015609642 0.012459374
## [103] 0.014486906 0.016147518 0.010355512 0.013025315 0.015836406 0.012888968
## [109] 0.011338770 0.014605197 0.012234962 0.008032287 0.012125891 0.009630490
## [115] 0.011296588 0.011202783 0.011510972 0.018090253 0.015544059 0.014832642
## [121] 0.009621382 0.011776294 0.010624812 0.014045141 0.014825058 0.016763251
## [127] 0.011694207 0.014036016 0.013556851 0.016954474 0.010830190 0.018719475
## [133] 0.009576698 0.010968153 0.012862720 0.018092852 0.016624552 0.015987193
## [139] 0.008032287 0.010025300 0.019732998 0.011097285 0.010240149 0.012236275
## [145] 0.010270387 0.016384522 0.014769182
stressplot(mds) # Produces a Shepards diagram

# Stats: Homogeneity of dispersion test
dispersion <-
  permutest(betadisper(beta_dist, thirtyday_mortality_overall_vector)) # No significant difference in dispersion between Survivor and Non-Survivor

dispersion_pval <- dispersion$tab$`Pr(>F)`[1]

# Stats: PERMANOVA
set.seed(123)
mds_stats <-
  adonis2(
    beta_dist ~ thirtyday_mortality_overall_vector,
    method = "bray-curtis",
    permutations = 999
  )
mds_pval <- mds_stats$`Pr(>F)`[1]

# Stats: Pairwise analysis
pair_mod <-
  pairwise.adonis(beta_dist, factors = thirtyday_mortality_overall_vector, p.adjust.m = "BH")
pair_mod
##                      pairs Df SumsOfSqs  F.Model         R2 p.value p.adjusted
## 1 Non-Survivor vs Survivor  1  1.047949 2.606943 0.01766138   0.005      0.005
##   sig
## 1   *
mds_data2 <- mds_data %>%
  rownames_to_column(var = "shotgunSeq_id") %>%
  left_join(micu_new_nocovid_oc %>%
    select(shotgunSeq_id, thirtyday_mortality_overall))

ggplot_mds <-
  ggplot(
    mds_data2,
    aes(
      x = MDS1,
      y = MDS2,
      color = thirtyday_mortality_overall,
      fill = thirtyday_mortality_overall
    )
  ) +
  stat_ellipse(
    level = 0.1,
    geom = "polygon",
    alpha = 0.35,
    type = "euclid"
  ) +
  geom_point(alpha = 0.65, size = 10) +
  theme_bw() +
  theme(
    axis.title = et(color = "black", size = 72),
    axis.text = et(color = "black", size = 60),
    # plot.subtitle = et(color = "black", size = 79),
    panel.grid.minor = eb(),
    panel.grid.major = eb(),
    legend.position = "none",
    plot.margin = margin(
      # Top margin
      t = 5,
      # Right margin
      r = 5,
      # Bottom margin
      b = 5,
      # Left margin
      l = 5 
    )
  ) +
  annotate(
    "text",
    x = -0.4,
    y = 1.5,
    hjust = 0,
    size = 18,
    label = paste0(
      "BetaDisper = ", dispersion_pval, "\n",
      "PERMANOVA, p = ", mds_pval
    )
  ) +
  labs(
    y = "MDS2",
    x = "MDS1"
  ) +
  ggsci::scale_color_lancet() +
  ggsci::scale_fill_lancet() +
  guides(
    fill = guide_legend("Outcome"),
    color = guide_legend("Outcome")
  ) +
  coord_equal(
    ylim = c(-1.2, 1.7),
    xlim = c(-1.2, 1.7)
  )

ggplot_mds

ggsave(
  plot = ggplot_mds,
  filename = "./Results/Beta_Diversity_BrayCurtis_train.pdf",
  height = 14,
  width = 14,
  units = "in"
)

Qual Metabolites UMAP

umap_metab_qual <- micu_new_nocovid_oc %>%
  left_join(metab_qual_imp_tot) %>%
  group_by(compound) %>%
  mutate(n = sum(is.na(mvalue))) %>%
  ungroup() %>%
  mutate(p = length(unique(metabolomicsID)))

umap_metab_qual_mat <- umap_metab_qual %>%
  select(metabolomicsID, compound, mvalue) %>%
  group_by(compound) %>%
  mutate(zscore = (mvalue - mean(mvalue, na.rm = TRUE)) / sd(mvalue, na.rm = TRUE)) %>%
  pivot_wider(metabolomicsID,
    names_from = "compound",
    values_from = "zscore"
  ) %>%
  purrr::discard(~ all(is.nan(.))) %>%
  column_to_rownames(var = "metabolomicsID") %>%
  janitor::remove_constant(.)

custom_config <- umap.defaults
custom_config$n_neighbors <-
  as.integer(nrow(umap_metab_qual_mat) * 0.1)
custom_config$random_state <- 123
custom_config$metric <- "manhattan"
custom_config$n_epochs <- 1000
custom_config$min_dist <- 0.1

umap_metab_qual_mat2 <-
  umap(umap_metab_qual_mat, config = custom_config)

umap_metab_qual_plot <- umap_metab_qual_mat2$layout %>%
  as.data.frame() %>%
  mutate(metabolomicsID = row.names(.)) %>%
  left_join(
    umap_metab_qual %>%
      group_by(metabolomicsID, thirtyday_mortality_overall) %>%
      dplyr::slice(1) %>%
      select(metabolomicsID, thirtyday_mortality_overall)
  ) %>%
  ggplot(aes(x = V1, y = V2, color = thirtyday_mortality_overall)) +
  geom_point(alpha = 0.65, size = 3.25) +
  theme_bw() +
  theme(
    panel.grid = eb(),
    axis.title = et(color = "black", size = 14),
    axis.text = et(color = "black", size = 12),
    legend.title = et(color = "black", size = 14),
    legend.text = et(color = "black", size = 12)
  ) +
  ggtitle(
    paste0(
      "Qualitative Metabolomics: UMAP \nSurvivor vs Non-Survivor \n",
      "n = ",
      nrow(umap_metab_qual_mat),
      "\n",
      custom_config$n_neighbors,
      " Neighbors"
    )
  ) +
  xlab("UMAP1") +
  ylab("UMAP2") +
  guides(
    color = guide_legend(title = "Outcome"),
    fill = guide_legend(title = "Outcome")
  ) +
  ggsci::scale_color_lancet() +
  ggsci::scale_fill_lancet()

umap_metab_qual_plot

ggsave(
  plot = umap_metab_qual_plot,
  filename = "./Results/Qual_Metab_UMAP_30_Day_Mortality_train.pdf",
  height = 8,
  width = 10,
  units = "in"
)

Qual Metabolites PCA

# PCA dataframe
pca_metab_qual_mat <- umap_metab_qual %>%
  select(metabolomicsID, thirtyday_mortality_overall, compound, mvalue) %>%
  group_by(compound) %>%
  mutate(zscore = (mvalue - mean(mvalue, na.rm = TRUE)) / sd(mvalue, na.rm = TRUE)) %>%
  pivot_wider(
    id_cols = c(metabolomicsID, thirtyday_mortality_overall),
    names_from = "compound",
    values_from = "zscore"
  ) %>%
  purrr::discard(~ all(is.nan(.))) %>%
  column_to_rownames(var = "metabolomicsID") %>%
  janitor::remove_constant()

# PCA on correlation matrix
pca_res <-
  prcomp(pca_metab_qual_mat[, -1], center = FALSE, scale = FALSE)

summary(pca_res)
## Importance of components:
##                           PC1     PC2     PC3     PC4     PC5     PC6     PC7
## Standard deviation     3.6511 2.54681 2.44266 2.31590 1.92628 1.73952 1.68448
## Proportion of Variance 0.1606 0.07815 0.07189 0.06462 0.04471 0.03646 0.03419
## Cumulative Proportion  0.1606 0.23876 0.31064 0.37526 0.41997 0.45643 0.49061
##                            PC8     PC9    PC10    PC11    PC12    PC13    PC14
## Standard deviation     1.60363 1.54173 1.52089 1.45934 1.41766 1.34467 1.28241
## Proportion of Variance 0.03098 0.02864 0.02787 0.02566 0.02421 0.02178 0.01981
## Cumulative Proportion  0.52160 0.55023 0.57810 0.60376 0.62797 0.64976 0.66957
##                           PC15    PC16    PC17    PC18    PC19    PC20    PC21
## Standard deviation     1.24167 1.18619 1.15842 1.13762 1.12421 1.10958 1.07550
## Proportion of Variance 0.01858 0.01695 0.01617 0.01559 0.01523 0.01483 0.01394
## Cumulative Proportion  0.68815 0.70510 0.72127 0.73686 0.75209 0.76692 0.78086
##                           PC22    PC23   PC24    PC25    PC26    PC27    PC28
## Standard deviation     1.03520 1.01536 1.0064 0.98318 0.97183 0.90282 0.89062
## Proportion of Variance 0.01291 0.01242 0.0122 0.01165 0.01138 0.00982 0.00956
## Cumulative Proportion  0.79377 0.80619 0.8184 0.83004 0.84142 0.85124 0.86080
##                           PC29    PC30    PC31    PC32    PC33    PC34    PC35
## Standard deviation     0.85413 0.83269 0.80412 0.78972 0.75485 0.74079 0.71580
## Proportion of Variance 0.00879 0.00835 0.00779 0.00751 0.00686 0.00661 0.00617
## Cumulative Proportion  0.86959 0.87794 0.88573 0.89324 0.90011 0.90672 0.91289
##                           PC36    PC37    PC38    PC39    PC40    PC41    PC42
## Standard deviation     0.69428 0.68104 0.66875 0.64352 0.62875 0.60195 0.59572
## Proportion of Variance 0.00581 0.00559 0.00539 0.00499 0.00476 0.00437 0.00428
## Cumulative Proportion  0.91870 0.92429 0.92968 0.93467 0.93943 0.94380 0.94807
##                           PC43    PC44    PC45    PC46    PC47   PC48    PC49
## Standard deviation     0.59284 0.58234 0.56646 0.53530 0.51416 0.4905 0.48317
## Proportion of Variance 0.00423 0.00409 0.00387 0.00345 0.00319 0.0029 0.00281
## Cumulative Proportion  0.95231 0.95639 0.96026 0.96371 0.96690 0.9698 0.97261
##                           PC50    PC51    PC52    PC53    PC54    PC55   PC56
## Standard deviation     0.44907 0.43308 0.41981 0.40313 0.38160 0.36702 0.3641
## Proportion of Variance 0.00243 0.00226 0.00212 0.00196 0.00175 0.00162 0.0016
## Cumulative Proportion  0.97504 0.97730 0.97942 0.98138 0.98313 0.98475 0.9863
##                           PC57    PC58    PC59    PC60   PC61    PC62    PC63
## Standard deviation     0.33655 0.32349 0.30821 0.30306 0.2877 0.26160 0.25361
## Proportion of Variance 0.00136 0.00126 0.00114 0.00111 0.0010 0.00082 0.00077
## Cumulative Proportion  0.98772 0.98898 0.99012 0.99123 0.9922 0.99305 0.99383
##                           PC64    PC65    PC66    PC67    PC68    PC69    PC70
## Standard deviation     0.25059 0.23569 0.22418 0.21218 0.20844 0.19770 0.18923
## Proportion of Variance 0.00076 0.00067 0.00061 0.00054 0.00052 0.00047 0.00043
## Cumulative Proportion  0.99458 0.99525 0.99586 0.99640 0.99692 0.99739 0.99783
##                           PC71    PC72    PC73    PC74    PC75    PC76    PC77
## Standard deviation     0.17314 0.16975 0.15394 0.13510 0.13329 0.12599 0.10324
## Proportion of Variance 0.00036 0.00035 0.00029 0.00022 0.00021 0.00019 0.00013
## Cumulative Proportion  0.99819 0.99853 0.99882 0.99904 0.99925 0.99944 0.99957
##                           PC78    PC79    PC80    PC81    PC82    PC83
## Standard deviation     0.10035 0.08983 0.07560 0.07219 0.06378 0.04845
## Proportion of Variance 0.00012 0.00010 0.00007 0.00006 0.00005 0.00003
## Cumulative Proportion  0.99969 0.99979 0.99986 0.99992 0.99997 1.00000
# Scree plot
fviz_eig(pca_res, addlabels = TRUE)

# Biplot
fviz_pca_var(pca_res, col.var = "black", )

# Variable contribution plot
fviz_cos2(pca_res, choice = "var", axes = 1:2)

# Biplot plus Cos2 values
fviz_pca_var(
  pca_res,
  col.var = "cos2",
  gradient.cols = c("black", "orange", "green"),
  repel = TRUE
)

# Biplot plus contributions values
gg_pca_qual_vars <-
  fviz_pca_var(
    pca_res,
    col.var = "contrib",
    gradient.cols = c("white", "blue", "red"),
    ggtheme = theme_minimal(),
    repel = TRUE
  ) +
  theme(
    panel.grid = eb(),
    axis.text = et(size = 12, color = "black"),
    axis.title = et(size = 14, color = "black"),
    legend.title = et(size = 14, color = "black"),
    legend.text = et(size = 12, color = "black")
  ) +
  scale_x_continuous(expand = expansion(mult = c(0.25, 0.25))) +
  scale_y_continuous(expand = expansion(mult = c(0.25, 0.25))) +
  labs(color = "Contribution")

# Color individuals by outcome
gg_pca_qual_ind <-
  fviz_pca_ind(
    pca_res,
    label = "none",
    habillage = pca_metab_qual_mat$thirtyday_mortality_overall,
    addEllipses = TRUE,
    ellipse.level = 0.95,
    ggtheme = theme_minimal(),
  ) +
  theme(
    panel.grid = eb(),
    axis.text = et(size = 12, color = "black"),
    axis.title = et(size = 14, color = "black"),
    legend.title = et(size = 14, color = "black"),
    legend.text = et(size = 12, color = "black")
  ) +
  ggsci::scale_color_lancet() +
  ggsci::scale_fill_lancet()

pdf(
  file = "./Results/Qual_Metab_PCA_train.pdf",
  height = 8,
  width = 24
)
cowplot::plot_grid(gg_pca_qual_ind, gg_pca_qual_vars)
invisible(invisible(dev.off()))

Qual Metabolites Volcano

qual_log2fc <- umap_metab_qual %>%
  select(metabolomicsID, compound, mvalue, thirtyday_mortality_overall) %>%
  mutate(mvalue = ifelse(is.na(mvalue), 0, mvalue)) %>%
  group_by(compound) %>%
  filter(any(mvalue != 0)) %>%
  summarise(log2fc_val = log((
    mean(mvalue[thirtyday_mortality_overall == "Survivor"], na.rm = T) / mean(mvalue[thirtyday_mortality_overall == "Non-Survivor"], na.rm = T)
  ), base = 2)) %>%
  filter(compound != "pre-q1")

qual_pval <- umap_metab_qual %>%
  select(metabolomicsID, compound, mvalue, thirtyday_mortality_overall) %>%
  mutate(mvalue = ifelse(is.na(mvalue), 0, mvalue)) %>%
  group_by(compound) %>%
  filter(any(mvalue != 0)) %>%
  rstatix::wilcox_test(mvalue ~ thirtyday_mortality_overall) %>%
  rstatix::adjust_pvalue(method = "BH") %>%
  rstatix::add_significance("p.adj")

qual_tot <- left_join(qual_log2fc, qual_pval) %>%
  column_to_rownames(var = "compound")

write.csv(qual_tot, "./Results/volcano_list_train.csv")

# volcano label color
volcano_labcol <- qual_tot %>%
  filter(p.adj <= 0.1 & abs(log2fc_val) >= 0.75) %>%
  mutate(color = ifelse(
    log2fc_val > 0,
    ggsci::pal_lancet(palette = "lanonc")(2)[1],
    ggsci::pal_lancet(palette = "lanonc")(2)[2]
  ))

# Volcano Plot (adjusted)
set.seed(123)
volcano_adj <-
  EnhancedVolcano(
    qual_tot,
    lab = rownames(qual_tot),
    x = "log2fc_val",
    y = "p.adj",
    title = NULL,
    pCutoff = 0.1,
    FCcutoff = 0.75,
    pointSize = 6,
    labSize = 8,
    axisLabSize = 32,
    labCol = volcano_labcol$color,
    caption = NULL,
    colAlpha = 0.65,
    col = c("gray85", c("grey40", "grey10", "#F27DFA")),
    legendPosition = "bottom",
    legendLabels = c(
      expression(p.adj > 0.1 * ";" ~ Log[2] ~ FC < "\u00B1" * 0.75),
      expression(p.adj > 0.1 * ";" ~ Log[2] ~ FC >= "\u00B1" *
        0.75),
      expression(p.adj <= 0.1 * ";" ~ Log[2] ~ FC < "\u00B1" *
        0.75),
      expression(p.adj <= 0.1 * ";" ~ Log[2] ~ FC >= "\u00B1" *
        0.75)
    ),
    legendLabSize = 14,
    boxedLabels = TRUE,
    drawConnectors = TRUE,
    widthConnectors = 0.2,
    arrowheads = FALSE,
    gridlines.minor = FALSE,
    gridlines.major = FALSE,
    max.overlaps = Inf,
    min.segment.length = 0.5
  ) +
  theme(
    axis.text = et(color = "black"),
    legend.text = et(hjust = 0, size = 18),
    plot.margin = unit(c(0, 0, 0, 0), "cm")
  ) +
  labs(subtitle = NULL) +
  annotate(
    "segment",
    x = 0.8,
    xend = 2.5,
    y = 2.3,
    yend = 2.3,
    arrow = arrow(),
    size = 2,
    color = ggsci::pal_lancet(palette = "lanonc")(2)[1]
  ) +
  annotate(
    "text",
    x = 0.8,
    y = 2.4,
    hjust = 0,
    label = "Survivor",
    size = 12,
    color = ggsci::pal_lancet(palette = "lanonc")(2)[1]
  ) +
  annotate(
    "rect",
    xmin = 0.75,
    xmax = Inf,
    ymin = -log(0.1, base = 10),
    ymax = Inf,
    alpha = .1,
    fill = ggsci::pal_lancet(palette = "lanonc")(2)[1]
  ) +

  annotate(
    "segment",
    x = -0.8,
    xend = -2.5,
    y = 2.3,
    yend = 2.3,
    arrow = arrow(),
    size = 2,
    color = ggsci::pal_lancet(palette = "lanonc")(2)[2]
  ) +
  annotate(
    "text",
    x = -1.55,
    y = 2.4,
    hjust = 0.5,
    label = "Non-Survivor",
    size = 12,
    color = ggsci::pal_lancet(palette = "lanonc")(2)[2]
  ) +
  annotate(
    "rect",
    xmin = -0.75,
    xmax = -Inf,
    ymin = -log(0.1, base = 10),
    ymax = Inf,
    alpha = .1,
    fill = ggsci::pal_lancet(palette = "lanonc")(2)[2]
  ) +
  guides(
    color = guide_legend(nrow = 4),
    shape = guide_legend(nrow = 4)
  ) +
  scale_y_continuous(
    expand = expansion(mult = c(0, 0.05)),
    limits = c(0, 2.4),
    breaks = seq(0, 2, 0.5)
  )

volcano_adj

ggsave(
  plot = volcano_adj,
  filename = "./Results/Qual_Metab_Volcano_30_Day_Mortality_train.pdf",
  width = 24,
  height = 14
)

Metaphlan Relative Abundance, Alpha Diversity, and Enterococcus/Enterobacterales Relative Abundance

metaphlan_df2 <- t_metaphlan_micu_nocovid %>%
  left_join(taxdmp %>% mutate(taxid = as.character(taxid))) %>%
  drop_na(taxid) %>%
  arrange(Kingdom, Phylum, Class, Order, Family, Genus) %>%
  mutate(Genus = paste0(Phylum, "-", Order, "-", Family, "-", Genus)) %>%
  left_join(alpha_shannon) %>%
  group_by(shotgunSeq_id) %>%
  arrange(Genus) %>%
  mutate(
    cum.pct = cumsum(pctseqs),
    y.text = (cum.pct + c(0, cum.pct[-length(cum.pct)])) / 2
  ) %>%
  ungroup() %>%
  dplyr::select(-cum.pct)

metaphlan_pal <- getRdpPal(metaphlan_df2)

gg_metaphlan <- t_metaphlan_micu_nocovid %>%
  left_join(
    micu_new_nocovid_oc %>% select(shotgunSeq_id, thirtyday_mortality_overall) %>% mutate(
      thirtyday_mortality_overall = factor(
        thirtyday_mortality_overall,
        levels = c("Non-Survivor", "Survivor")
      )
    )
  ) %>%
  left_join(taxdmp %>% mutate(taxid = as.character(taxid))) %>%
  drop_na(taxid) %>%
  arrange(Kingdom, Phylum, Class, Order, Family, Genus) %>%
  mutate(Genus = paste0(Phylum, "-", Order, "-", Family, "-", Genus)) %>%
  left_join(alpha_shannon) %>%
  group_by(shotgunSeq_id) %>%
  mutate(
    cum.pct = cumsum(pctseqs),
    y.text = (cum.pct + c(0, cum.pct[-length(cum.pct)])) / 2
  ) %>%
  ungroup() %>%
  mutate(Genus = factor(Genus, levels = unique(Genus))) %>%
  group_by(shotgunSeq_id) %>%
  arrange(Genus) %>%
  ggplot(aes(x = reorder(shotgunSeq_id, Shannon), y = pctseqs)) +
  geom_bar(stat = "identity", aes(fill = Genus), width = 0.9) +
  scale_fill_manual(values = metaphlan_pal) +
  theme_bw() +
  theme(
    legend.position = "none",
    axis.text.x = eb(),
    axis.ticks.x = eb(),
    strip.text.x = et(angle = 0, size = 12),
    strip.background = eb(),
    axis.title.y = et(color = "black", size = 14),
    axis.text.y = et(color = "black", size = 12),
    panel.spacing = unit(0.5, "lines"),
    plot.margin = margin(
      t = 5,
      r = 5,
      b = 0,
      l = 5
    )
  ) +
  facet_grid(. ~ thirtyday_mortality_overall,
    scales = "free",
    space = "free_x"
  ) +
  scale_y_continuous(
    expand = expansion(mult = 0.005),
    labels = scales::percent_format(accuracy = 1)
  ) +
  scale_x_discrete(expand = expansion(add = 1)) +
  ylab("MetaPhlAn4 Relative Abundance\n") +
  xlab("")

gg_metaphlan

pdf(
  "./Results/Metaphlan_Relative_Abundance_train.pdf",
  height = 6,
  width = 12
)
gg_metaphlan
invisible(dev.off())

#### Alpha Diversity Plot Richness START ####
mat_filt <- t_metaphlan_micu_nocovid %>%
  pivot_wider(
    shotgunSeq_id,
    names_from = "taxid",
    values_from = "pctseqs",
    values_fill = 0
  ) %>%
  as.data.frame()

# Obtain stats for alpha diversity (Shannnon)
alpha_shannon_stats <-
  alpha_shannon %>%
  left_join(micu_new_nocovid_oc %>% select(shotgunSeq_id, thirtyday_mortality_overall)) %>%
  rstatix::wilcox_test(Shannon ~ thirtyday_mortality_overall)

pirate_colors <- rev(ggsci::pal_igv("default")(2))

set.seed(456)
gg_alpha_shannon <- alpha_shannon %>%
  left_join(micu_new_nocovid_oc %>% select(shotgunSeq_id, thirtyday_mortality_overall)) %>%
  mutate(
    thirtyday_mortality_overall = as.factor(thirtyday_mortality_overall),
    thirtyday_mortality_overall = factor(
      thirtyday_mortality_overall,
      levels = c("Survivor", "Non-Survivor")
    )
  ) %>%
  ggplot(
    .,
    aes(
      x = thirtyday_mortality_overall,
      y = Shannon,
      colour = thirtyday_mortality_overall,
      fill = thirtyday_mortality_overall
    )
  ) +
  geom_pirate(
    cis_params = list(fill = "white", alpha = 0.5),
    bars_params = list(alpha = 0.65),
    lines_params = list(size = 0.5),
    points_params = list(fill = "black", size = 3.5),
    jitter_width = 0.75,
    cis = TRUE,
    violins = FALSE
  ) +
  annotate(
    "text",
    x = 1.5,
    y = 5,
    label = paste0(
      "Wilcoxon, W = ",
      alpha_shannon_stats$statistic,
      ", p = ",
      alpha_shannon_stats$p
    ),
    size = 8
  ) +
  theme_bw() +
  theme(
    panel.grid = eb(),
    axis.title.y = et(size = 30, color = "black"),
    axis.title.x = eb(),
    axis.text = et(size = 25, color = "black"),
    plot.margin = margin(
      # Top margin
      t = 5,
      # Right margin
      r = 5,
      # Bottom margin
      b = 5,
      # Left margin
      l = 5
    ) 
  ) +
  ylab("Alpha Diversity\n(Shannon Index)\n") +
  scale_fill_manual(values = rev(pirate_colors)) +
  scale_color_manual(values = rev(pirate_colors)) +
  scale_y_continuous(breaks = seq(0, 5, 1))

# Figure 1B: Top Left Panel
gg_alpha_shannon + ggtitle("Figure 1B: Left Panel")

pdf("./Results/Pirate_Shannon_train.pdf",
  height = 6,
  width = 7
)
gg_alpha_shannon
invisible(dev.off())

#### Species Richness START ####
mat_richness <- mat_filt
row.names(mat_richness) <- mat_richness$shotgunSeq_id
mat_richness <- mat_richness %>% select(-shotgunSeq_id)

mat_richness_t <- mat_richness %>% t()

alpha_richness <- vegan::specnumber(mat_richness) %>%
  as.data.frame()
colnames(alpha_richness)[1] <- "Richness"
alpha_richness$shotgunSeq_id <- row.names(alpha_richness)

# Obtain values for mean alpha diversity for Survivor and Non-Survivor
alpha_richness %>%
  left_join(micu_new_nocovid_oc %>% select(shotgunSeq_id, thirtyday_mortality_overall)) %>%
  group_by(thirtyday_mortality_overall) %>%
  summarise(mean = mean(Richness))
## # A tibble: 2 × 2
##   thirtyday_mortality_overall  mean
##   <fct>                       <dbl>
## 1 Survivor                     59.4
## 2 Non-Survivor                 44.8
# Obtain stats for species richness
alpha_richness_stats <-
  alpha_richness %>%
  left_join(micu_new_nocovid_oc %>% select(shotgunSeq_id, thirtyday_mortality_overall)) %>%
  mutate(
    thirtyday_mortality_overall = as.factor(thirtyday_mortality_overall),
    thirtyday_mortality_overall = factor(
      thirtyday_mortality_overall,
      levels = c("Survivor", "Non-Survivor")
    )
  ) %>%
  rstatix::wilcox_test(Richness ~ thirtyday_mortality_overall)

set.seed(456)
gg_alpha_richness <- alpha_richness %>%
  left_join(micu_new_nocovid_oc %>% select(shotgunSeq_id, thirtyday_mortality_overall)) %>%
  mutate(
    thirtyday_mortality_overall = as.factor(thirtyday_mortality_overall),
    thirtyday_mortality_overall = factor(
      thirtyday_mortality_overall,
      levels = c("Survivor", "Non-Survivor")
    )
  ) %>%
  ggplot(
    .,
    aes(
      x = thirtyday_mortality_overall,
      y = Richness,
      colour = thirtyday_mortality_overall,
      fill = thirtyday_mortality_overall
    )
  ) +
  geom_pirate(
    cis_params = list(fill = "white", alpha = 0.5),
    bars_params = list(alpha = 0.65),
    lines_params = list(size = 0.5),
    points_params = list(fill = "black", size = 3.5),
    jitter_width = 0.75,
    cis = TRUE,
    violins = FALSE
  ) +
  annotate(
    "text",
    x = 1.5,
    y = 175,
    label = paste0(
      "Wilcoxon, W = ",
      alpha_richness_stats$statistic,
      ", p = ",
      alpha_richness_stats$p
    ),
    size = 8
  ) +
  theme_bw() +
  theme(
    panel.grid = eb(),
    axis.title.y = et(size = 30, color = "black"),
    axis.title.x = eb(),
    axis.text = et(size = 25, color = "black"),
    plot.margin = margin(
      # Top margin
      t = 5,
      # Right margin
      r = 5,
      # Bottom margin
      b = 5,
      # Left margin
      l = 5
      )
  ) +
  ylab("Alpha Diversity\n(Species Richness)\n") +
  scale_fill_manual(values = pirate_colors) +
  scale_color_manual(values = pirate_colors) +
  scale_y_continuous(breaks = seq(0, 175, 25))


# Figure 1B: Middle Panel
gg_alpha_richness + ggtitle("Figure 1B: Middle Panel")

pdf("./Results/Pirate_Richness_train.pdf",
  height = 6,
  width = 7
)
gg_alpha_richness
invisible(dev.off())

#### Species Evenness START ####
mat_evenness <- mat_filt
row.names(mat_evenness) <- mat_evenness$shotgunSeq_id
mat_evenness <- mat_evenness %>% select(-shotgunSeq_id)
mat_evenness_t <- mat_evenness %>% t()
h <- vegan::diversity(mat_evenness)
s <- vegan::specnumber(mat_filt)
alpha_evenness <- h / log(s)
alpha_evenness <- as.data.frame(alpha_evenness)
colnames(alpha_evenness)[1] <- "Evenness"
alpha_evenness$shotgunSeq_id <- row.names(alpha_evenness)

# Obtain values for mean alph for Survivor and Non-Survivor
alpha_evenness %>%
  left_join(micu_new_nocovid_oc %>% select(shotgunSeq_id, thirtyday_mortality_overall)) %>%
  group_by(thirtyday_mortality_overall) %>%
  summarise(mean = mean(Evenness))
## # A tibble: 2 × 2
##   thirtyday_mortality_overall  mean
##   <fct>                       <dbl>
## 1 Survivor                    0.553
## 2 Non-Survivor                0.506
# Obtain stats for alpha diversity
alpha_evenness_stats <-
  alpha_evenness %>%
  left_join(micu_new_nocovid_oc %>% select(shotgunSeq_id, thirtyday_mortality_overall)) %>%
  mutate(
    thirtyday_mortality_overall = as.factor(thirtyday_mortality_overall),
    thirtyday_mortality_overall = factor(
      thirtyday_mortality_overall,
      levels = c("Survivor", "Non-Survivor")
    )
  ) %>%
  rstatix::wilcox_test(Evenness ~ thirtyday_mortality_overall)

set.seed(456)
gg_alpha_evenness <- alpha_evenness %>%
  left_join(micu_new_nocovid_oc %>% select(shotgunSeq_id, thirtyday_mortality_overall)) %>%
  mutate(
    thirtyday_mortality_overall = as.factor(thirtyday_mortality_overall),
    thirtyday_mortality_overall = factor(
      thirtyday_mortality_overall,
      levels = c("Survivor", "Non-Survivor")
    )
  ) %>%
  ggplot(
    .,
    aes(
      x = thirtyday_mortality_overall,
      y = Evenness,
      colour = thirtyday_mortality_overall,
      fill = thirtyday_mortality_overall
    )
  ) +
  geom_pirate(
    cis_params = list(fill = "white", alpha = 0.5),
    bars_params = list(alpha = 0.65),
    lines_params = list(size = 0.5),
    points_params = list(fill = "black", size = 3.5),
    jitter_width = 0.75,
    cis = TRUE,
    violins = FALSE
  ) +
  annotate(
    "text",
    x = 1.5,
    y = 1,
    label = paste0(
      "Wilcoxon, W = ",
      alpha_evenness_stats$statistic,
      ", p = ",
      alpha_evenness_stats$p
    ),
    size = 8
  ) +
  theme_bw() +
  theme(
    panel.grid = eb(),
    axis.title.y = et(size = 30, color = "black"),
    axis.title.x = eb(),
    axis.text = et(size = 25, color = "black"),
    plot.margin = margin(
      # Top margin
      t = 5,
      # Right margin
      r = 5,
      # Bottom margin
      b = 5,
      # Left margin
      l = 5
      ) 
  ) +
  ylab("Alpha Diversity\n(Species Evenness)\n") +
  scale_fill_manual(values = pirate_colors) +
  scale_color_manual(values = pirate_colors) +
  scale_y_continuous(breaks = seq(0, 1, 0.1))


# Figure 1B: Right Panel
gg_alpha_evenness + ggtitle("Figure 1B: Right Panel")

pdf("./Results/Pirate_Evenness_train.pdf",
  height = 6,
  width = 7
)
gg_alpha_evenness
invisible(dev.off())

Delta SOFA Score Correlation with MD Score

delta_stool_sofa <- micu_new_nocovid_oc %>%
  select(metabolomicsID, dSOFA_admission, dSOFA_stool) %>%
  left_join(km_nocovid_final %>% select(metabolomicsID, thirtyday_mortality_overall, md_score)) %>%
  mutate(
    md_score = as.numeric(md_score),
    dSOFA_admission = as.numeric(dSOFA_admission)
  ) %>%
  drop_na(thirtyday_mortality_overall)

# Delta SOFA Stool
ggscatter(
  delta_stool_sofa,
  y = "dSOFA_stool",
  x = "md_score",
  size = 3,
  alpha = 0.2,
  palette = "jco",
  add = "reg.line"
) +
  stat_cor(
    method = "spearman"
  )

ggsave(
  filename = "./Results/delta_SOFA_stool_MDS_train.pdf",
  height = 8,
  width = 8,
  units = "in"
)

# Delta SOFA Stool
ggscatter(
  delta_stool_sofa,
  y = "dSOFA_admission",
  x = "md_score",
  size = 3,
  alpha = 0.2,
  palette = "jco",
  add = "reg.line"
) +
  stat_cor(
    method = "spearman"
  )

ggsave(
  filename = "./Results/delta_SOFA_admission_MDS_train.pdf",
  height = 8,
  width = 8,
  units = "in"
)

Diversity, MD Score Patient List

shannon_mmp_list <- micu_new_nocovid_oc %>%
  select(shotgunSeq_id, metabolomicsID) %>%
  right_join(km_nocovid_final %>% select(metabolomicsID, thirtyday_mortality_overall, md_score)) %>%
  left_join(cutpoints_results_var_slct_shannon) %>%
  mutate(
    High_Shannon = ifelse(
      Shannon >= coordinates_shannon$threshold,
      "High Diversity",
      "Low Diversity"
    ),
    HS_LMDS = ifelse(
      Shannon >= coordinates_shannon$threshold &
        md_score < coordinates_mds$threshold,
      1,
      0
    ),
    HS_HMDS = ifelse(
      Shannon >= coordinates_shannon$threshold &
        md_score >= coordinates_mds$threshold,
      1,
      0
    ),
    LS_LMDS = ifelse(
      Shannon < coordinates_shannon$threshold &
        md_score < coordinates_mds$threshold,
      1,
      0
    ),
    LS_HMDS = ifelse(
      Shannon < coordinates_shannon$threshold &
        md_score >= coordinates_mds$threshold,
      1,
      0
    )
  )

write.csv(shannon_mmp_list,
  "./Results/shannon_mds_list_train.csv",
  row.names = FALSE
)

# Summary
shannon_mmp_list %>%
  select(HS_LMDS:LS_HMDS) %>%
  colSums() %>%
  as.data.frame() %>%
  rownames_to_column(var = "measure") %>%
  dplyr::rename(count = ".") %>%
  mutate(
    total = sum(count),
    percent = (count / total) * 100
  ) %>%
  write.csv(., "./Results/shannon_mds_summary_train.csv", row.names = FALSE)

First Sample Distribution

# Make dataframe of first samples and their day_collected
first_samp_dist <- first_samp_list_anon %>%
  right_join(micu_new_nocovid_oc %>%
    select(unique_id, thirtyday_mortality_overall)) %>%
  select(thirtyday_mortality_overall, day_collected)

# Run Kolmogorov-Smirnov test to compare distributions of day_collected between Survivor and Non-Survivor
first_samp_test <-
  ks.test(
    first_samp_dist %>% filter(thirtyday_mortality_overall == "Survivor") %>% pull(day_collected),
    first_samp_dist %>% filter(thirtyday_mortality_overall != "Survivor") %>% pull(day_collected)
  )

first_samp_test
## 
##  Exact two-sample Kolmogorov-Smirnov test
## 
## data:  first_samp_dist %>% filter(thirtyday_mortality_overall == "Survivor") %>% pull(day_collected) and first_samp_dist %>% filter(thirtyday_mortality_overall != "Survivor") %>% pull(day_collected)
## D = 0.14118, p-value = 0.2367
## alternative hypothesis: two-sided
# D = 0.14 and p = 0.24, therefore the samples from both "Survivor" and "Non-Survivor" come from the same distribution

first_samp_dist %>%
  ggpubr::gghistogram(
    x = "day_collected",
    fill = "thirtyday_mortality_overall",
    color = "thirtyday_mortality_overall",
    binwidth = 1,
    palette = c(
      ggsci::pal_lancet(palette = "lanonc")(2)[1],
      ggsci::pal_lancet(palette = "lanonc")(2)[2]
    ),
    alpha = 0.3
  ) +
  annotate(
    "text",
    x = 15,
    y = 15,
    label = paste0(
      "Kolmogorov-Smirnov; D(195) = ",
      round(first_samp_test[1][[1]], 3),
      " p = ",
      round(first_samp_test[2][[1]], 3)
    )
  ) +
  guides(
    fill = guide_legend("Outcome"),
    color = guide_legend("Outcome")
  ) +
  ylab("Count\n") +
  xlab("\nDay of First Sample Collection")

ggsave(
  "./Results/first_samp_distribution_train.pdf",
  height = 6,
  width = 8,
  units = "in"
)

Microbiome Metabolomic Profile

# MMP Score
mmp_df <- cutpoints_df %>% 
  filter(compound %in% c("deoxycholic acid", 
                         "isodeoxycholic acid", 
                         "lithocholic acid", 
                         "desaminotyrosine")) %>% 
  mutate(cutpoint_prediction = case_when(
      compound == "deoxycholic acid" &
        mvalue__mM >= (89.92/1000) ~ 0,
      compound == "isodeoxycholic acid" &
        mvalue__mM >= (0.97/1000) ~ 0,
      compound == "lithocholic acid" &
        mvalue__mM >= (258.25/1000) ~ 0,
      compound == "desaminotyrosine" &
        mvalue__mM >= (21.31/1000) ~ 0,
      TRUE ~ 1
    )) %>% 
  group_by(metabolomicsID, thirtyday_mortality_overall) %>%
  summarize(mmp_score = sum(cutpoint_prediction)) %>% 
  ungroup() %>% 
  mutate(grouped_mmp_score = ifelse(mmp_score >= 2, "High MMP", "Low MMP"))

  
# Boxplot of MD Score
mmp_chis <-
  stats::chisq.test(
    mmp_df$thirtyday_mortality_overall,
    mmp_df$grouped_mmp_score
  )

mmp_violin <-
  ggviolin(
    mmp_df,
    x = "thirtyday_mortality_overall",
    y = "mmp_score",
    fill = "thirtyday_mortality_overall",
    palette = "lancet",
    add = c("dotplot"),
    add.params = list(binwidth = 0.05)
  ) +
  annotate(
    "text",
    x = 1.5,
    y = 12,
    label = paste0(
      "Chisq",
      "(",
      round(mmp_chis$statistic, 3),
      "),",
      " p =",
      scales::scientific(mmp_chis$p.value)
    )
  ) +
  annotate(
    "segment",
    x = 1,
    xend = 2,
    y = 11.35,
    yend = 11.35
  ) +
  annotate(
    "segment",
    x = 1,
    xend = 1,
    y = 11.25,
    yend = 11.35
  ) +
  annotate(
    "segment",
    x = 2,
    xend = 2,
    y = 11.25,
    yend = 11.35
  ) +
  ylab("Microbiome Metabolomic Profile\n") +
  xlab("") +
  guides(fill = guide_legend("30 Day Mortality"))

mmp_violin

ggsave(
  plot = mmp_violin,
  filename = "./Results/MMP_Violin_train.pdf",
  height = 6,
  width = 8
)


# gg_mmp_chi <- gginference::ggchisqtest(mmp_chis, colaccept = "green3", colreject = "red3") # It is highly unlikely that our test statistic would be observed if there were no association between survival outcome and the md score
# gg_mmp_chi

# Confusion matrix for MMP Score

mmp_df2 <- mmp_df %>% 
  mutate(prediction = ifelse(grouped_mmp_score == "Low MMP", "Survivor", "Non-Survivor"))

caret::confusionMatrix(table(
  factor(mmp_df2$prediction,
         levels = c("Survivor", "Non-Survivor")),
  factor(
    mmp_df2$thirtyday_mortality_overall,
    levels = c("Survivor", "Non-Survivor")
  )
))
## Confusion Matrix and Statistics
## 
##               
##                Survivor Non-Survivor
##   Survivor           42            8
##   Non-Survivor       60           37
##                                           
##                Accuracy : 0.5374          
##                  95% CI : (0.4534, 0.6199)
##     No Information Rate : 0.6939          
##     P-Value [Acc > NIR] : 1               
##                                           
##                   Kappa : 0.1769          
##                                           
##  Mcnemar's Test P-Value : 6.224e-10       
##                                           
##             Sensitivity : 0.4118          
##             Specificity : 0.8222          
##          Pos Pred Value : 0.8400          
##          Neg Pred Value : 0.3814          
##              Prevalence : 0.6939          
##          Detection Rate : 0.2857          
##    Detection Prevalence : 0.3401          
##       Balanced Accuracy : 0.6170          
##                                           
##        'Positive' Class : Survivor        
## 
# Confusion Matrix and Statistics
# 
#               
#                Survivor Non-Survivor
#   Survivor           42            8
#   Non-Survivor       60           37
#                                           
#                Accuracy : 0.5374          
#                  95% CI : (0.4534, 0.6199)
            # Sensitivity : 0.4118          
            # Specificity : 0.8222  

# Export confusion matrix
bind_rows(
  as.data.frame(as.table(caret::confusionMatrix(
  table(
    factor(mmp_df2$prediction,
           levels = c("Survivor", "Non-Survivor")),
    factor(
      mmp_df2$thirtyday_mortality_overall,
      levels = c("Survivor", "Non-Survivor")
    )
  )
))) %>% dplyr::rename(Prediction = Var1,
                      Actual = Var2),

  as.data.frame(as.matrix(caret::confusionMatrix(
  table(
    factor(mmp_df2$prediction,
           levels = c("Survivor", "Non-Survivor")),
    factor(
      mmp_df2$thirtyday_mortality_overall,
      levels = c("Survivor", "Non-Survivor")
    )
  )
),
what = "overall")),

as.data.frame(as.matrix(caret::confusionMatrix(
  table(
    factor(mmp_df2$prediction,
           levels = c("Survivor", "Non-Survivor")),
    factor(
      mmp_df2$thirtyday_mortality_overall,
      levels = c("Survivor", "Non-Survivor")
    )
  )
),
what = "classes"))) %>% write.csv("./Results/MMP_Confusion_Matrix_Data.csv")

Kaplan-Meier Survival Analysis

km_mmp <- micu_new_nocovid_oc %>%
  select(
    unique_id,
    sampleid,
    metabolomicsID,
    days_until_death_overall,
    censoring_thirtyday_mortality_overall,
    thirtyday_mortality_overall
  ) %>%
  ungroup() %>%
  mutate(
    surv_days = ifelse(
      is.na(days_until_death_overall) &
        thirtyday_mortality_overall == "Survivor",
      censoring_thirtyday_mortality_overall,
      days_until_death_overall
    ),
    surv_days = ifelse(
      is.na(surv_days) &
        thirtyday_mortality_overall == "Survivor",
      30,
      surv_days
    ),
    surv_days = ifelse(
      surv_days > 30 &
        thirtyday_mortality_overall == "Survivor",
      30,
      surv_days
    ),
    thirtyday_mortality_overall_class = ifelse(thirtyday_mortality_overall == "Survivor", 0, 1)
  ) %>% # Non-Survivor is 1
  left_join(mmp_df)
  

# KM Curves: MD Score
set.seed(123)
surv_object_mmp <-
  Surv(
    time = km_mmp$surv_days,
    event = km_mmp$thirtyday_mortality_overall_class
  )

fit_mmp <- survfit(surv_object ~ grouped_mmp_score, data = km_mmp)

ggs_mmp <- ggsurvplot(
  fit_mmp,
  data = km_mmp,
  size = 1,
  palette = c("#C45258", "#2F4858"),
  xlab = "Days from Admission",
  conf.int = TRUE,
  pval = TRUE,
  risk.table = "abs_pct",
  legend = "bottom",
  risk.table.height = 0.4,
  risk.table.y.text.col = TRUE,
  tables.y.text = FALSE,
  risk.table.fontsize = 2.8,
  pval.size = 3.5,
  ggtheme = theme_test() + theme(
    panel.grid.major = el(linewidth = 0.5, color = "gray90"),
    axis.text.y = et(color = "black", size = 10),
    axis.title.y = et(color = "black")
  ),
  legend.labs = c("High MMP Score", "Low MMP Score")
)

# Change table axis labels
ggs_mmp$table <-
  ggs_mmp$table + labs(x = NULL, y = NULL) + theme(plot.title = eb()) # risk table

ggs_mmp

pdf(
  "./Results/kaplan_meier_MMP_30_Day_Mortality_train.pdf",
  height = 4,
  width = 6,
  onefile = FALSE
)
ggs_mmp
invisible(dev.off())

Cox Proportional Hazards Regression Analysis

# Variables labels
cox_df_mmp <- tableone_nocovid_df_filt %>%
  labelled::remove_labels() %>%
  janitor::clean_names() %>%
  mutate(
    race_factor = as.character(race_factor),
    race_factor = ifelse(
      race_factor %in% c("Asian", "More than one race", "White, Hispanic"),
      "Other",
      race_factor
    )
  ) %>%
  left_join(
    micu_nocovid_first_samps_omics_light %>%
      group_by(metabolomicsID) %>%
      slice(1) %>%
      select(unique_id, metabolomicsID)
  ) %>%
  left_join(km_mmp %>% select(metabolomicsID, mmp_score)) %>%
  right_join(
    micu_new_nocovid_oc %>% select(
      unique_id,
      days_until_death_overall,
      censoring_thirtyday_mortality_overall,
      thirtyday_mortality_overall
    )
  ) %>%
  mutate(
    surv_days = ifelse(
      is.na(days_until_death_overall) &
        thirtyday_mortality_overall == "Survivor",
      censoring_thirtyday_mortality_overall,
      days_until_death_overall
    ),
    surv_days = ifelse(
      is.na(surv_days) &
        thirtyday_mortality_overall == "Survivor",
      30,
      surv_days
    ),
    surv_days = ifelse(
      surv_days > 30 &
        thirtyday_mortality_overall == "Survivor",
      30,
      surv_days
    ),
    thirtyday_mortality_overall_class = ifelse(thirtyday_mortality_overall == "Survivor", 0, 1)) %>% 
  dplyr::rename(`Charlson Comorbidity Index` = cci_total_sc) %>%
  mutate(diet = ifelse(diet == "1", "Diet", "NPO")) %>% 
  dplyr::rename(
    `Sex` = "sex_factor",
    `Age` = "age",
    `Acute respiratory distress syndrome` = "ards_factor",
    `Sepsis` = "sepsis_factor",
    `SOFA Score` = "sofa_score_total",
    `Race` = "race_factor",
    `Time to stool sample` = "day_collected",
    `Diet` = "diet",
    `MMP` = "mmp_score",
  )


reset_gtsummary_theme()

coxauc_mmp <-
  coxph(
    Surv(cox_df_mmp$surv_days, cox_df_mmp$thirtyday_mortality_overall_class) ~
      `Sex` +
      `Age` +
      `Charlson Comorbidity Index` +
      `Acute respiratory distress syndrome` +
      `Sepsis` +
      `SOFA Score` +
      `Race` +
      `Time to stool sample` +
      `Diet` +
      `MMP`,
    data = cox_df_mmp
  ) %>%
  tbl_regression(
    exp = TRUE,
    pvalue_fun = function(x) {
      if_else(is.na(x), NA_character_, if_else(
        x < 0.001,
        format(x,
          digits = 3, scientific = TRUE
        ),
        format(round(x, 3),
          scientific = F
        )
      ))
    }
  ) %>%
  modify_footnote(everything() ~ NA, abbreviation = TRUE)


coxauc_mmp %>%
  gtsummary::modify_caption("**Cox Proportional Hazards Regression**")
Cox Proportional Hazards Regression
Characteristic HR 95% CI p-value
Sex
    Female
    Male 1.26 0.64, 2.46 0.498
Age 1.00 0.97, 1.02 0.754
Charlson Comorbidity Index 1.14 0.99, 1.31 0.079
Acute respiratory distress syndrome
    No
    Yes 2.74 1.28, 5.88 0.010
Sepsis
    None
    Sepsis 1.76 0.68, 4.60 0.246
SOFA Score 1.04 0.96, 1.13 0.339
Race
    African American
    Other 1.99 0.52, 7.62 0.314
    White, non-Hispanic 2.50 1.18, 5.29 0.017
Time to stool sample 1.01 0.91, 1.11 0.887
Diet
    Diet
    NPO 1.79 0.83, 3.84 0.135
MMP 1.22 0.95, 1.57 0.127
# In case you get an error: "Error in s$close() : attempt to apply non-function", run this code below:
# f <- chromote::default_chromote_object() #get the f object
# f$close()

gt::gtsave(gtsummary::as_gt(coxauc_mmp), file = "./Results/cox_model_MMP_30_Day_Mortality_train.png")

Validation Cohort

Validation Cohort Analysis

MD Score

# Cutpoint dataframe
cutpoints_df_vc <- metab_quant_imp_tot_mM %>%
  pivot_wider(
    id_cols = c(metabolomicsID),
    names_from = "compound",
    values_from = "mvalue__mM"
  ) %>%
  group_by(metabolomicsID) %>%
  pivot_longer(!c(metabolomicsID),
    names_to = "compound",
    values_to = "mvalue__mM"
  ) %>%
  right_join(micu_new_nocovid_vc %>% select(metabolomicsID, thirtyday_mortality_overall)) %>%
  group_by(compound) %>%
  mutate(n = length(compound)) %>%
  ungroup() %>%
  mutate(p = length(unique(metabolomicsID))) %>%
  mutate(
    thirtyday_mortality_overall_class = ifelse(thirtyday_mortality_overall == "Survivor", 0, 1)
  ) %>%
  drop_na(compound)

cutpoints_results_var_slct_vc <-
  cutpoints_df_vc %>%
  filter(
    compound %in% c(
      as.matrix(coef(cutpoint_best_ridge)) %>%
        as.data.frame() %>%
        rownames_to_column(var = "compound") %>%
        filter(compound != "(Intercept)") %>%
        arrange(desc(abs(s0))) %>%
        dplyr::slice(1:optimal_components) %>%
        pull(compound)
    )
  ) %>%
  left_join(
    cutpoints_unnest %>%
      dplyr::rename(compound = subgroup) %>%
      select(compound, direction, optimal_cutpoint)
  ) %>%
  mutate(
    cutpoint_prediction = case_when(
      direction == "<=" & mvalue__mM <= optimal_cutpoint ~ 1,
      direction == "<=" & mvalue__mM > optimal_cutpoint ~ 0,
      direction == ">=" & mvalue__mM >= optimal_cutpoint ~ 1,
      direction == ">=" & mvalue__mM < optimal_cutpoint ~ 0
    )
  ) %>%
  group_by(metabolomicsID, thirtyday_mortality_overall) %>%
  summarize(md_score = sum(cutpoint_prediction)) %>%
  mutate(
    grouped_md_score = ifelse(
      md_score >= coordinates_mds$threshold,
      "High Score",
      "Low Score"
    ),
    prediction = ifelse(grouped_md_score == "Low Score", "Survivor", "Non-Survivor")
  )

caret::confusionMatrix(table(
  factor(
    cutpoints_results_var_slct_vc$prediction,
    levels = c("Survivor", "Non-Survivor")
  ),
  factor(
    cutpoints_results_var_slct_vc$thirtyday_mortality_overall,
    levels = c("Survivor", "Non-Survivor")
  )
))
## Confusion Matrix and Statistics
## 
##               
##                Survivor Non-Survivor
##   Survivor           28            8
##   Non-Survivor        6            7
##                                           
##                Accuracy : 0.7143          
##                  95% CI : (0.5674, 0.8342)
##     No Information Rate : 0.6939          
##     P-Value [Acc > NIR] : 0.4464          
##                                           
##                   Kappa : 0.3014          
##                                           
##  Mcnemar's Test P-Value : 0.7893          
##                                           
##             Sensitivity : 0.8235          
##             Specificity : 0.4667          
##          Pos Pred Value : 0.7778          
##          Neg Pred Value : 0.5385          
##              Prevalence : 0.6939          
##          Detection Rate : 0.5714          
##    Detection Prevalence : 0.7347          
##       Balanced Accuracy : 0.6451          
##                                           
##        'Positive' Class : Survivor        
## 
  #              Survivor Non-Survivor
  # Survivor           28            6
  # Non-Survivor        6            9

Kaplan-Meier Survival Analysis

km_nocovid_vc <- micu_new_nocovid_vc %>%
  select(
    unique_id,
    sampleid,
    metabolomicsID,
    days_until_death_overall,
    censoring_thirtyday_mortality_overall,
    thirtyday_mortality_overall
  ) %>%
  ungroup() %>%
  mutate(
    surv_days = ifelse(
      is.na(days_until_death_overall) &
        thirtyday_mortality_overall == "Survivor",
      censoring_thirtyday_mortality_overall,
      days_until_death_overall
    ),
    surv_days = ifelse(
      is.na(surv_days) &
        thirtyday_mortality_overall == "Survivor",
      30,
      surv_days
    ),
    surv_days = ifelse(
      surv_days > 30 &
        thirtyday_mortality_overall == "Survivor",
      30,
      surv_days
    ),
    thirtyday_mortality_overall_class = ifelse(thirtyday_mortality_overall == "Survivor", 0, 1)
  ) %>% # Non-Survivor is 1
  left_join(cutpoints_results_var_slct_vc) 

# KM Curves: MD Score
set.seed(123)
surv_object_vc <-
  Surv(
    time = km_nocovid_vc$surv_days,
    event = km_nocovid_vc$thirtyday_mortality_overall_class
  )

fit_vc <-
  survfit(surv_object_vc ~ grouped_md_score, data = km_nocovid_vc)

ggs_vc <- ggsurvplot(
  fit_vc,
  data = km_nocovid_vc,
  size = 1,
  palette = c("#C45258", "#2F4858"),
  xlab = "Days from Admission",
  conf.int = TRUE,
  pval = TRUE,
  risk.table = "abs_pct",
  legend = "bottom",
  risk.table.height = 0.4,
  risk.table.y.text.col = TRUE,
  tables.y.text = FALSE,
  risk.table.fontsize = 2.8,
  pval.size = 3.5,
  ggtheme = theme_test() + theme(
    panel.grid.major = el(linewidth = 0.5, color = "gray90"),
    axis.text.y = et(color = "black", size = 10),
    axis.title.y = et(color = "black")
  ),
  legend.labs = c("High MD Score", "Low MD Score")
)

# Change table axis labels
ggs_vc$table <-
  ggs_vc$table + labs(x = NULL, y = NULL) + theme(plot.title = eb()) # risk table

ggs_vc

pdf(
  "./Results/kaplan_meier_roc_loop_30_Day_Mortality_validation.pdf",
  height = 4,
  width = 6,
  onefile = FALSE
)
ggs_vc
invisible(dev.off())

# Boxplot of MD Score
mds_chis_vc <-
  stats::chisq.test(
    km_nocovid_vc$thirtyday_mortality_overall,
    km_nocovid_vc$md_score
  )

md_violin_vc <-
  ggviolin(
    km_nocovid_vc,
    x = "thirtyday_mortality_overall",
    y = "md_score",
    fill = "thirtyday_mortality_overall",
    palette = "lancet",
    add = c("dotplot"),
    add.params = list(binwidth = 0.05)
  ) +
  annotate(
    "text",
    x = 1.5,
    y = 12,
    label = paste0(
      "Chisq",
      "(",
      round(mds_chis_vc$statistic, 3),
      "),",
      " p =",
      scales::scientific(mds_chis_vc$p.value)
    )
  ) +
  annotate(
    "segment",
    x = 1,
    xend = 2,
    y = 11.35,
    yend = 11.35
  ) +
  annotate(
    "segment",
    x = 1,
    xend = 1,
    y = 11.25,
    yend = 11.35
  ) +
  annotate(
    "segment",
    x = 2,
    xend = 2,
    y = 11.25,
    yend = 11.35
  ) +
  ylab("Metabolic Dysbiosis Score\n") +
  xlab("") +
  guides(fill = guide_legend("30 Day Mortality"))

md_violin_vc

ggsave(
  plot = md_violin_vc,
  filename = "./Results/MDS_Violin_validation.pdf",
  height = 6,
  width = 8
)

# gg_mds_chi_vc <- gginference::ggchisqtest(mds_chis_vc, colaccept = "green3", colreject = "red3") # It is highly unlikely that our test statistic would be observed if there were no association between survival outcome and the md score
# gg_mds_chi_vc

Cox Proportional Hazards Regression Analysis

# Validation cohort 
cri_rxmar_abx_long_vc <- readRDS("./Data/cri_rxmar_abx_long_vc.rds")

tableone_nocovid_df_vc <-
  micu_new_nocovid_vc %>%
  left_join(cri_rxmar_abx_long_vc, by = "unique_id") %>% 
  mutate(across(Cephalosporins:Quinolones, ~ str_to_title(.))) %>% 
  mutate(across(Cephalosporins:Quinolones, ~ replace_na(., "Unchecked"))) %>%
  mutate(across(Cephalosporins:Quinolones, ~ as.factor(.))) %>% 
  mutate(across(Cephalosporins:Quinolones, ~ factor(., levels = c("Unchecked", "Checked")))) %>% 
  mutate(across(Hypertension:Tuberculosis, ~ factor(., levels = c("Unchecked", "Checked")))) %>%
  mutate(across(Acute.respiratory.distress.syndrome:Newly.diagnosed.solid.malignancy, ~ factor(., levels = c("Unchecked", "Checked")))) %>%
  mutate(across(Myocardial.infract:AIDS, ~ factor(., levels = c("Unchecked", "Checked")))) %>%
  select(
    age,
    sex.factor,
    bmi,
    race.factor,
    cci_total_sc,
    thirtyday_mortality_overall,
    primary_dx.factor,
    ards.factor,
    sepsis.factor,
    admit_from.factor,
    COVID_upon_admission,
    sofa_score_total,
    ap2_total_score,
    reason_for_intubation.factor,
    reintub_1.factor,
    reintub_2.factor,
    total_ventilator_days,
    icu_los_total,
    hospital_los,
    day_collected,
    Hypertension:`Neuromuscular.disorder`,
    `Peptic.ulcer.disease`,
    `Thyroid.disease`:Tuberculosis,
    `Bacterial.pneumonia`:`Newly.diagnosed.solid.malignancy`,
    `Myocardial.infract`:`AIDS`,
    Penicillins,
    Cephalosporins,
    Carbapenems,
    Vancomycin,
    Metronidazole,
    Macrolides,
    Quinolones,
    other,
    Clindamycin,
    Aminoglycosides,
    Doxycycline,
    `Trimethoprim-Sulfamethoxazole`,
    Rifaximin,
    `diet`,
    dSOFA_admission, dSOFA_stool
  ) %>%
  janitor::clean_names() %>%
  select(-c(
    hypertension:tuberculosis,
    reason_for_intubation_factor:hospital_los
  )) %>%
  replace_na(list(reason_for_intubation_factor = "Not intubated")) %>%
  droplevels()

tableone_nocovid_vc <- CreateTableOne(
  data = tableone_nocovid_df_vc,
  strata = "thirtyday_mortality_overall",
  includeNA = TRUE
)

summary(tableone_nocovid_vc)
## 
##      ### Summary of continuous variables ###
## 
## thirtyday_mortality_overall: Survivor
##                   n miss p.miss mean sd median p25 p75 min max  skew kurt
## age              34    0      0 60.2 16     64  54  71  23  88 -0.68 -0.2
## bmi              34    0      0 31.4 12     28  24  36  14  62  1.00  0.6
## cci_total_sc     34    0      0  4.8  3      5   3   7   0  10  0.08 -0.5
## sofa_score_total 34    0      0  7.4  4      8   4  10   0  16  0.37 -0.6
## ap2_total_score  34    0      0 23.0  8     22  16  29  10  40  0.36 -0.8
## day_collected    34    0      0  4.3  5      3   1   5   0  22  2.21  4.7
## d_sofa_admission 34    0      0  0.7  3      1  -1   3  -5   6 -0.22 -0.4
## d_sofa_stool     34    1      3  0.6  2      0   0   1  -3   6  1.04  1.6
## ------------------------------------------------------------ 
## thirtyday_mortality_overall: Non-Survivor
##                   n miss p.miss mean sd median p25 p75 min max skew kurt
## age              15    0      0 63.5 11     62  58  72  38  84 -0.3  0.8
## bmi              15    0      0 32.5 10     32  26  37  17  55  0.5  0.5
## cci_total_sc     15    0      0  6.3  3      6   5   8   3  13  1.2  2.6
## sofa_score_total 15    0      0 10.9  4     11   8  14   4  17 -0.3 -0.7
## ap2_total_score  15    0      0 27.8  9     29  24  30   8  43 -0.2  1.5
## day_collected    15    0      0  2.1  2      1   1   3   0   7  1.7  3.7
## d_sofa_admission 15    0      0  0.1  3     -1  -2   2  -4   7  0.9  0.8
## d_sofa_stool     15    1      7 -1.1  4      0  -3   0  -8   6 -0.2  0.7
## 
## p-values
##                      pNormal pNonNormal
## age              0.475119103 0.64069874
## bmi              0.748593177 0.52928469
## cci_total_sc     0.067349490 0.09170164
## sofa_score_total 0.009810838 0.01194707
## ap2_total_score  0.070124034 0.04923691
## day_collected    0.112836057 0.12578808
## d_sofa_admission 0.487735424 0.33119621
## d_sofa_stool     0.032403839 0.07463793
## 
## Standardize mean differences
##                     1 vs 2
## age              0.2382830
## bmi              0.1028927
## cci_total_sc     0.5815343
## sofa_score_total 0.8375881
## ap2_total_score  0.5698122
## day_collected    0.5761683
## d_sofa_admission 0.2145763
## d_sofa_stool     0.6171648
## 
## =======================================================================================
## 
##      ### Summary of categorical variables ### 
## 
## thirtyday_mortality_overall: Survivor
##                                            var  n miss p.miss
##                                     sex_factor 34    0    0.0
##                                                              
##                                                              
##                                    race_factor 34    0    0.0
##                                                              
##                                                              
##                                                              
##                                                              
##                                                              
##                    thirtyday_mortality_overall 34    0    0.0
##                                                              
##                                                              
##                              primary_dx_factor 34    0    0.0
##                                                              
##                                                              
##                                                              
##                                                              
##                                                              
##                                                              
##                                                              
##                                                              
##                                    ards_factor 34    0    0.0
##                                                              
##                                                              
##                                  sepsis_factor 34    0    0.0
##                                                              
##                                                              
##                              admit_from_factor 34    0    0.0
##                                                              
##                                                              
##                                                              
##                                                              
##                                                              
##                                                              
##                                                              
##                           covid_upon_admission 34    0    0.0
##                                                              
##                            bacterial_pneumonia 34    0    0.0
##                                                              
##                                                              
##                               fungal_pneumonia 34    0    0.0
##                                                              
##                                viral_pneumonia 34    0    0.0
##                                                              
##   chronic_obstructive_pulmonary_disease_copd_1 34    0    0.0
##                                                              
##                                                              
##                            asthma_exacerbation 34    0    0.0
##                                                              
##                            lung_lobar_collapse 34    0    0.0
##                                                              
##                                                              
##                             pulmonary_embolism 34    0    0.0
##                                                              
##                                                              
##                                     hemoptysis 34    0    0.0
##                                                              
##                                   pancreatitis 34    0    0.0
##                                                              
##                 infection_genitourinary_system 34    0    0.0
##                                                              
##                                                              
##                      infection_intra_abdominal 34    0    0.0
##                                                              
##                                                              
##                          infection_soft_tissue 34    0    0.0
##                                                              
##                                                              
##                                  infection_cns 34    0    0.0
##                                                              
##                                                              
##               hepatic_failure_acute_fullminant 34    0    0.0
##                                                              
##               hepatic_failure_acute_on_chronic 34    0    0.0
##                                                              
##                                                              
##                          diabetic_ketoacidosis 34    0    0.0
##                                                              
##                                                              
##                                 acute_leukemia 34    0    0.0
##                                                              
##                   cerebral_vascular_accident_1 34    0    0.0
##                                                              
##                                                              
##       acute_myocardial_infarction_nstemi_stemi 34    0    0.0
##                                                              
##                    diffuse_alveolar_hemorrhage 34    0    0.0
##                                                              
##   decompensated_heart_failure_pulmonary_oedema 34    0    0.0
##                                                              
##                                                              
##                               pleural_effusion 34    0    0.0
##                                                              
##                                                              
##         interstitial_lung_disease_exacerbation 34    0    0.0
##                                                              
##                                                              
##                           organizing_pneumonia 34    0    0.0
##                                                              
##                  acute_eosinophilic_pneumoniae 34    0    0.0
##                                                              
##                                          other 34    0    0.0
##                                                              
##                                                              
##                                     angioedema 34    0    0.0
##                                                              
##                                                              
##                            acute_renal_failure 34    0    0.0
##                                                              
##                                                              
##                          altered_mental_status 34    0    0.0
##                                                              
##                                                              
##                           hypertensive_urgency 34    0    0.0
##                                                              
##                                                              
##                         hypertensive_emergency 34    0    0.0
##                                                              
##                                                              
##                                   endocarditis 34    0    0.0
##                                                              
##                                                              
##                                     bacteremia 34    0    0.0
##                                                              
##                                                              
##                      gastrointestinal_bleeding 34    0    0.0
##                                                              
##                              hemorrhagic_shock 34    0    0.0
##                                                              
##                                     aspiration 34    0    0.0
##                                                              
##                                                              
##  central_line_associated_blood_steam_infection 34    0    0.0
##                                                              
##                     prosthetic_joint_infection 34    0    0.0
##                                                              
##                  new_onset_atrial_fibrillation 34    0    0.0
##                                                              
##               newly_diagnosed_solid_malignancy 34    0    0.0
##                                                              
##                                                              
##                             myocardial_infract 34    0    0.0
##                                                              
##                                                              
##                       congestive_heart_failure 34    0    0.0
##                                                              
##                                                              
##                peripheral_vascular_disease_cci 34    0    0.0
##                                                              
##                                                              
##                        cerebrovascular_disease 34    0    0.0
##                                                              
##                                                              
##                                       dementia 34    0    0.0
##                                                              
##                      chronic_pulmonary_disease 34    0    0.0
##                                                              
##                                                              
##                    connective_tissue_disease_1 34    0    0.0
##                                                              
##                                                              
##                                  ulcer_disease 34    0    0.0
##                                                              
##                                                              
##                             mild_liver_disease 34    0    0.0
##                                                              
##                                                              
##                 diabetes_without_complications 34    0    0.0
##                                                              
##                                                              
##                 diabetes_with_end_organ_damage 34    0    0.0
##                                                              
##                                                              
##                                     hemiplegia 34    0    0.0
##                                                              
##                                                              
##               moderate_or_severe_renal_disease 34    0    0.0
##                                                              
##                                                              
##                     solid_tumor_non_metastatic 34    0    0.0
##                                                              
##                                                              
##                                       leukemia 34    0    0.0
##                                                              
##                                                              
##                                        lymhoma 34    0    0.0
##                                                              
##                                                              
##               moderate_or_severe_liver_disease 34    0    0.0
##                                                              
##                                                              
##                         metastatic_solid_tumor 34    0    0.0
##                                                              
##                                                              
##                                           aids 34    0    0.0
##                                                              
##                                                              
##                                    penicillins 34    0    0.0
##                                                              
##                                                              
##                                 cephalosporins 34    0    0.0
##                                                              
##                                                              
##                                    carbapenems 34    0    0.0
##                                                              
##                                                              
##                                     vancomycin 34    0    0.0
##                                                              
##                                                              
##                                  metronidazole 34    0    0.0
##                                                              
##                                                              
##                                     macrolides 34    0    0.0
##                                                              
##                                                              
##                                     quinolones 34    0    0.0
##                                                              
##                                                              
##                                        other_2 34    0    0.0
##                                                              
##                                                              
##                                    clindamycin 34    0    0.0
##                                                              
##                                                              
##                                aminoglycosides 34    0    0.0
##                                                              
##                                                              
##                                    doxycycline 34    0    0.0
##                                                              
##                                                              
##                  trimethoprim_sulfamethoxazole 34    0    0.0
##                                                              
##                                                              
##                                      rifaximin 34    0    0.0
##                                                              
##                                                              
##                                           diet 34    0    0.0
##                                                              
##                                                              
##                                   level freq percent cum.percent
##                                  Female   20    58.8        58.8
##                                    Male   14    41.2       100.0
##                                                                 
##                        African American   24    70.6        70.6
##                      More than one race    1     2.9        73.5
##                                   Other    0     0.0        73.5
##                         White, Hispanic    1     2.9        76.5
##                     White, non-Hispanic    8    23.5       100.0
##                                                                 
##                                Survivor   34   100.0       100.0
##                            Non-Survivor    0     0.0       100.0
##                                                                 
##        Acute (on chronic) liver failure    3     8.8         8.8
##                           CNS pathology    0     0.0         8.8
##                           GI hemorrhage    2     5.9        14.7
##              Post-operative observation    1     2.9        17.6
##               Respiratory failure, AHRF   11    32.4        50.0
##  Respiratory failure, airway compromise    3     8.8        58.8
##        Respiratory failure, ventilatory    4    11.8        70.6
##               Sepsis (+/- septic shock)   10    29.4       100.0
##                                                                 
##                                      No   30    88.2        88.2
##                                     Yes    4    11.8       100.0
##                                                                 
##                                    None    9    26.5        26.5
##                                  Sepsis   25    73.5       100.0
##                                                                 
##                              Cardiology    2     5.9         5.9
##                                      ED   13    38.2        44.1
##                        General Medicine    4    11.8        55.9
##                                   Liver    1     2.9        58.8
##                                Oncology    2     5.9        64.7
##                                     OSH    9    26.5        91.2
##                                 Surgery    3     8.8       100.0
##                                                                 
##                                      No   34   100.0       100.0
##                                                                 
##                               Unchecked   25    73.5        73.5
##                                 Checked    9    26.5       100.0
##                                                                 
##                               Unchecked   34   100.0       100.0
##                                                                 
##                               Unchecked   34   100.0       100.0
##                                                                 
##                               Unchecked   33    97.1        97.1
##                                 Checked    1     2.9       100.0
##                                                                 
##                               Unchecked   34   100.0       100.0
##                                                                 
##                               Unchecked   32    94.1        94.1
##                                 Checked    2     5.9       100.0
##                                                                 
##                               Unchecked   33    97.1        97.1
##                                 Checked    1     2.9       100.0
##                                                                 
##                               Unchecked   34   100.0       100.0
##                                                                 
##                               Unchecked   34   100.0       100.0
##                                                                 
##                               Unchecked   26    76.5        76.5
##                                 Checked    8    23.5       100.0
##                                                                 
##                               Unchecked   30    88.2        88.2
##                                 Checked    4    11.8       100.0
##                                                                 
##                               Unchecked   32    94.1        94.1
##                                 Checked    2     5.9       100.0
##                                                                 
##                               Unchecked   33    97.1        97.1
##                                 Checked    1     2.9       100.0
##                                                                 
##                               Unchecked   34   100.0       100.0
##                                                                 
##                               Unchecked   33    97.1        97.1
##                                 Checked    1     2.9       100.0
##                                                                 
##                               Unchecked   33    97.1        97.1
##                                 Checked    1     2.9       100.0
##                                                                 
##                               Unchecked   34   100.0       100.0
##                                                                 
##                               Unchecked   33    97.1        97.1
##                                 Checked    1     2.9       100.0
##                                                                 
##                               Unchecked   34   100.0       100.0
##                                                                 
##                               Unchecked   34   100.0       100.0
##                                                                 
##                               Unchecked   28    82.4        82.4
##                                 Checked    6    17.6       100.0
##                                                                 
##                               Unchecked   34   100.0       100.0
##                                 Checked    0     0.0       100.0
##                                                                 
##                               Unchecked   33    97.1        97.1
##                                 Checked    1     2.9       100.0
##                                                                 
##                               Unchecked   34   100.0       100.0
##                                                                 
##                               Unchecked   34   100.0       100.0
##                                                                 
##                               Unchecked   32    94.1        94.1
##                                 Checked    2     5.9       100.0
##                                                                 
##                               Unchecked   33    97.1        97.1
##                                 Checked    1     2.9       100.0
##                                                                 
##                               Unchecked   15    44.1        44.1
##                                 Checked   19    55.9       100.0
##                                                                 
##                               Unchecked   28    82.4        82.4
##                                 Checked    6    17.6       100.0
##                                                                 
##                               Unchecked   33    97.1        97.1
##                                 Checked    1     2.9       100.0
##                                                                 
##                               Unchecked   33    97.1        97.1
##                                 Checked    1     2.9       100.0
##                                                                 
##                               Unchecked   34   100.0       100.0
##                                 Checked    0     0.0       100.0
##                                                                 
##                               Unchecked   32    94.1        94.1
##                                 Checked    2     5.9       100.0
##                                                                 
##                               Unchecked   34   100.0       100.0
##                                                                 
##                               Unchecked   34   100.0       100.0
##                                                                 
##                               Unchecked   32    94.1        94.1
##                                 Checked    2     5.9       100.0
##                                                                 
##                               Unchecked   34   100.0       100.0
##                                                                 
##                               Unchecked   34   100.0       100.0
##                                                                 
##                               Unchecked   34   100.0       100.0
##                                                                 
##                               Unchecked   33    97.1        97.1
##                                 Checked    1     2.9       100.0
##                                                                 
##                               Unchecked   34   100.0       100.0
##                                 Checked    0     0.0       100.0
##                                                                 
##                               Unchecked   26    76.5        76.5
##                                 Checked    8    23.5       100.0
##                                                                 
##                               Unchecked   32    94.1        94.1
##                                 Checked    2     5.9       100.0
##                                                                 
##                               Unchecked   32    94.1        94.1
##                                 Checked    2     5.9       100.0
##                                                                 
##                               Unchecked   34   100.0       100.0
##                                                                 
##                               Unchecked   21    61.8        61.8
##                                 Checked   13    38.2       100.0
##                                                                 
##                               Unchecked   29    85.3        85.3
##                                 Checked    5    14.7       100.0
##                                                                 
##                               Unchecked   33    97.1        97.1
##                                 Checked    1     2.9       100.0
##                                                                 
##                               Unchecked   33    97.1        97.1
##                                 Checked    1     2.9       100.0
##                                                                 
##                               Unchecked   26    76.5        76.5
##                                 Checked    8    23.5       100.0
##                                                                 
##                               Unchecked   31    91.2        91.2
##                                 Checked    3     8.8       100.0
##                                                                 
##                               Unchecked   32    94.1        94.1
##                                 Checked    2     5.9       100.0
##                                                                 
##                               Unchecked   28    82.4        82.4
##                                 Checked    6    17.6       100.0
##                                                                 
##                               Unchecked   30    88.2        88.2
##                                 Checked    4    11.8       100.0
##                                                                 
##                               Unchecked   33    97.1        97.1
##                                 Checked    1     2.9       100.0
##                                                                 
##                               Unchecked   34   100.0       100.0
##                                 Checked    0     0.0       100.0
##                                                                 
##                               Unchecked   30    88.2        88.2
##                                 Checked    4    11.8       100.0
##                                                                 
##                               Unchecked   33    97.1        97.1
##                                 Checked    1     2.9       100.0
##                                                                 
##                               Unchecked   32    94.1        94.1
##                                 Checked    2     5.9       100.0
##                                                                 
##                               Unchecked   29    85.3        85.3
##                                 Checked    5    14.7       100.0
##                                                                 
##                               Unchecked   15    44.1        44.1
##                                 Checked   19    55.9       100.0
##                                                                 
##                               Unchecked   33    97.1        97.1
##                                 Checked    1     2.9       100.0
##                                                                 
##                               Unchecked   19    55.9        55.9
##                                 Checked   15    44.1       100.0
##                                                                 
##                               Unchecked   26    76.5        76.5
##                                 Checked    8    23.5       100.0
##                                                                 
##                               Unchecked   26    76.5        76.5
##                                 Checked    8    23.5       100.0
##                                                                 
##                               Unchecked   33    97.1        97.1
##                                 Checked    1     2.9       100.0
##                                                                 
##                               Unchecked   32    94.1        94.1
##                                 Checked    2     5.9       100.0
##                                                                 
##                               Unchecked   34   100.0       100.0
##                                 Checked    0     0.0       100.0
##                                                                 
##                               Unchecked   32    94.1        94.1
##                                 Checked    2     5.9       100.0
##                                                                 
##                               Unchecked   32    94.1        94.1
##                                 Checked    2     5.9       100.0
##                                                                 
##                               Unchecked   29    85.3        85.3
##                                 Checked    5    14.7       100.0
##                                                                 
##                               Unchecked   32    94.1        94.1
##                                 Checked    2     5.9       100.0
##                                                                 
##                                    diet   24    70.6        70.6
##                                     npo   10    29.4       100.0
##                                                                 
## ------------------------------------------------------------ 
## thirtyday_mortality_overall: Non-Survivor
##                                            var  n miss p.miss
##                                     sex_factor 15    0    0.0
##                                                              
##                                                              
##                                    race_factor 15    0    0.0
##                                                              
##                                                              
##                                                              
##                                                              
##                                                              
##                    thirtyday_mortality_overall 15    0    0.0
##                                                              
##                                                              
##                              primary_dx_factor 15    0    0.0
##                                                              
##                                                              
##                                                              
##                                                              
##                                                              
##                                                              
##                                                              
##                                                              
##                                    ards_factor 15    0    0.0
##                                                              
##                                                              
##                                  sepsis_factor 15    0    0.0
##                                                              
##                                                              
##                              admit_from_factor 15    0    0.0
##                                                              
##                                                              
##                                                              
##                                                              
##                                                              
##                                                              
##                                                              
##                           covid_upon_admission 15    0    0.0
##                                                              
##                            bacterial_pneumonia 15    0    0.0
##                                                              
##                                                              
##                               fungal_pneumonia 15    0    0.0
##                                                              
##                                viral_pneumonia 15    0    0.0
##                                                              
##   chronic_obstructive_pulmonary_disease_copd_1 15    0    0.0
##                                                              
##                                                              
##                            asthma_exacerbation 15    0    0.0
##                                                              
##                            lung_lobar_collapse 15    0    0.0
##                                                              
##                                                              
##                             pulmonary_embolism 15    0    0.0
##                                                              
##                                                              
##                                     hemoptysis 15    0    0.0
##                                                              
##                                   pancreatitis 15    0    0.0
##                                                              
##                 infection_genitourinary_system 15    0    0.0
##                                                              
##                                                              
##                      infection_intra_abdominal 15    0    0.0
##                                                              
##                                                              
##                          infection_soft_tissue 15    0    0.0
##                                                              
##                                                              
##                                  infection_cns 15    0    0.0
##                                                              
##                                                              
##               hepatic_failure_acute_fullminant 15    0    0.0
##                                                              
##               hepatic_failure_acute_on_chronic 15    0    0.0
##                                                              
##                                                              
##                          diabetic_ketoacidosis 15    0    0.0
##                                                              
##                                                              
##                                 acute_leukemia 15    0    0.0
##                                                              
##                   cerebral_vascular_accident_1 15    0    0.0
##                                                              
##                                                              
##       acute_myocardial_infarction_nstemi_stemi 15    0    0.0
##                                                              
##                    diffuse_alveolar_hemorrhage 15    0    0.0
##                                                              
##   decompensated_heart_failure_pulmonary_oedema 15    0    0.0
##                                                              
##                                                              
##                               pleural_effusion 15    0    0.0
##                                                              
##                                                              
##         interstitial_lung_disease_exacerbation 15    0    0.0
##                                                              
##                                                              
##                           organizing_pneumonia 15    0    0.0
##                                                              
##                  acute_eosinophilic_pneumoniae 15    0    0.0
##                                                              
##                                          other 15    0    0.0
##                                                              
##                                                              
##                                     angioedema 15    0    0.0
##                                                              
##                                                              
##                            acute_renal_failure 15    0    0.0
##                                                              
##                                                              
##                          altered_mental_status 15    0    0.0
##                                                              
##                                                              
##                           hypertensive_urgency 15    0    0.0
##                                                              
##                                                              
##                         hypertensive_emergency 15    0    0.0
##                                                              
##                                                              
##                                   endocarditis 15    0    0.0
##                                                              
##                                                              
##                                     bacteremia 15    0    0.0
##                                                              
##                                                              
##                      gastrointestinal_bleeding 15    0    0.0
##                                                              
##                              hemorrhagic_shock 15    0    0.0
##                                                              
##                                     aspiration 15    0    0.0
##                                                              
##                                                              
##  central_line_associated_blood_steam_infection 15    0    0.0
##                                                              
##                     prosthetic_joint_infection 15    0    0.0
##                                                              
##                  new_onset_atrial_fibrillation 15    0    0.0
##                                                              
##               newly_diagnosed_solid_malignancy 15    0    0.0
##                                                              
##                                                              
##                             myocardial_infract 15    0    0.0
##                                                              
##                                                              
##                       congestive_heart_failure 15    0    0.0
##                                                              
##                                                              
##                peripheral_vascular_disease_cci 15    0    0.0
##                                                              
##                                                              
##                        cerebrovascular_disease 15    0    0.0
##                                                              
##                                                              
##                                       dementia 15    0    0.0
##                                                              
##                      chronic_pulmonary_disease 15    0    0.0
##                                                              
##                                                              
##                    connective_tissue_disease_1 15    0    0.0
##                                                              
##                                                              
##                                  ulcer_disease 15    0    0.0
##                                                              
##                                                              
##                             mild_liver_disease 15    0    0.0
##                                                              
##                                                              
##                 diabetes_without_complications 15    0    0.0
##                                                              
##                                                              
##                 diabetes_with_end_organ_damage 15    0    0.0
##                                                              
##                                                              
##                                     hemiplegia 15    0    0.0
##                                                              
##                                                              
##               moderate_or_severe_renal_disease 15    0    0.0
##                                                              
##                                                              
##                     solid_tumor_non_metastatic 15    0    0.0
##                                                              
##                                                              
##                                       leukemia 15    0    0.0
##                                                              
##                                                              
##                                        lymhoma 15    0    0.0
##                                                              
##                                                              
##               moderate_or_severe_liver_disease 15    0    0.0
##                                                              
##                                                              
##                         metastatic_solid_tumor 15    0    0.0
##                                                              
##                                                              
##                                           aids 15    0    0.0
##                                                              
##                                                              
##                                    penicillins 15    0    0.0
##                                                              
##                                                              
##                                 cephalosporins 15    0    0.0
##                                                              
##                                                              
##                                    carbapenems 15    0    0.0
##                                                              
##                                                              
##                                     vancomycin 15    0    0.0
##                                                              
##                                                              
##                                  metronidazole 15    0    0.0
##                                                              
##                                                              
##                                     macrolides 15    0    0.0
##                                                              
##                                                              
##                                     quinolones 15    0    0.0
##                                                              
##                                                              
##                                        other_2 15    0    0.0
##                                                              
##                                                              
##                                    clindamycin 15    0    0.0
##                                                              
##                                                              
##                                aminoglycosides 15    0    0.0
##                                                              
##                                                              
##                                    doxycycline 15    0    0.0
##                                                              
##                                                              
##                  trimethoprim_sulfamethoxazole 15    0    0.0
##                                                              
##                                                              
##                                      rifaximin 15    0    0.0
##                                                              
##                                                              
##                                           diet 15    0    0.0
##                                                              
##                                                              
##                                   level freq percent cum.percent
##                                  Female    7    46.7        46.7
##                                    Male    8    53.3       100.0
##                                                                 
##                        African American    6    40.0        40.0
##                      More than one race    1     6.7        46.7
##                                   Other    1     6.7        53.3
##                         White, Hispanic    0     0.0        53.3
##                     White, non-Hispanic    7    46.7       100.0
##                                                                 
##                                Survivor    0     0.0         0.0
##                            Non-Survivor   15   100.0       100.0
##                                                                 
##        Acute (on chronic) liver failure    0     0.0         0.0
##                           CNS pathology    1     6.7         6.7
##                           GI hemorrhage    0     0.0         6.7
##              Post-operative observation    0     0.0         6.7
##               Respiratory failure, AHRF    6    40.0        46.7
##  Respiratory failure, airway compromise    0     0.0        46.7
##        Respiratory failure, ventilatory    0     0.0        46.7
##               Sepsis (+/- septic shock)    8    53.3       100.0
##                                                                 
##                                      No   10    66.7        66.7
##                                     Yes    5    33.3       100.0
##                                                                 
##                                    None    1     6.7         6.7
##                                  Sepsis   14    93.3       100.0
##                                                                 
##                              Cardiology    1     6.7         6.7
##                                      ED    4    26.7        33.3
##                        General Medicine    2    13.3        46.7
##                                   Liver    1     6.7        53.3
##                                Oncology    2    13.3        66.7
##                                     OSH    5    33.3       100.0
##                                 Surgery    0     0.0       100.0
##                                                                 
##                                      No   15   100.0       100.0
##                                                                 
##                               Unchecked   10    66.7        66.7
##                                 Checked    5    33.3       100.0
##                                                                 
##                               Unchecked   15   100.0       100.0
##                                                                 
##                               Unchecked   15   100.0       100.0
##                                                                 
##                               Unchecked   14    93.3        93.3
##                                 Checked    1     6.7       100.0
##                                                                 
##                               Unchecked   15   100.0       100.0
##                                                                 
##                               Unchecked   15   100.0       100.0
##                                 Checked    0     0.0       100.0
##                                                                 
##                               Unchecked   15   100.0       100.0
##                                 Checked    0     0.0       100.0
##                                                                 
##                               Unchecked   15   100.0       100.0
##                                                                 
##                               Unchecked   15   100.0       100.0
##                                                                 
##                               Unchecked   11    73.3        73.3
##                                 Checked    4    26.7       100.0
##                                                                 
##                               Unchecked   14    93.3        93.3
##                                 Checked    1     6.7       100.0
##                                                                 
##                               Unchecked   11    73.3        73.3
##                                 Checked    4    26.7       100.0
##                                                                 
##                               Unchecked   14    93.3        93.3
##                                 Checked    1     6.7       100.0
##                                                                 
##                               Unchecked   15   100.0       100.0
##                                                                 
##                               Unchecked   13    86.7        86.7
##                                 Checked    2    13.3       100.0
##                                                                 
##                               Unchecked   15   100.0       100.0
##                                 Checked    0     0.0       100.0
##                                                                 
##                               Unchecked   15   100.0       100.0
##                                                                 
##                               Unchecked   15   100.0       100.0
##                                 Checked    0     0.0       100.0
##                                                                 
##                               Unchecked   15   100.0       100.0
##                                                                 
##                               Unchecked   15   100.0       100.0
##                                                                 
##                               Unchecked   12    80.0        80.0
##                                 Checked    3    20.0       100.0
##                                                                 
##                               Unchecked   14    93.3        93.3
##                                 Checked    1     6.7       100.0
##                                                                 
##                               Unchecked   15   100.0       100.0
##                                 Checked    0     0.0       100.0
##                                                                 
##                               Unchecked   15   100.0       100.0
##                                                                 
##                               Unchecked   15   100.0       100.0
##                                                                 
##                               Unchecked   14    93.3        93.3
##                                 Checked    1     6.7       100.0
##                                                                 
##                               Unchecked   15   100.0       100.0
##                                 Checked    0     0.0       100.0
##                                                                 
##                               Unchecked    6    40.0        40.0
##                                 Checked    9    60.0       100.0
##                                                                 
##                               Unchecked   11    73.3        73.3
##                                 Checked    4    26.7       100.0
##                                                                 
##                               Unchecked   15   100.0       100.0
##                                 Checked    0     0.0       100.0
##                                                                 
##                               Unchecked   14    93.3        93.3
##                                 Checked    1     6.7       100.0
##                                                                 
##                               Unchecked   14    93.3        93.3
##                                 Checked    1     6.7       100.0
##                                                                 
##                               Unchecked   14    93.3        93.3
##                                 Checked    1     6.7       100.0
##                                                                 
##                               Unchecked   15   100.0       100.0
##                                                                 
##                               Unchecked   15   100.0       100.0
##                                                                 
##                               Unchecked   12    80.0        80.0
##                                 Checked    3    20.0       100.0
##                                                                 
##                               Unchecked   15   100.0       100.0
##                                                                 
##                               Unchecked   15   100.0       100.0
##                                                                 
##                               Unchecked   15   100.0       100.0
##                                                                 
##                               Unchecked   15   100.0       100.0
##                                 Checked    0     0.0       100.0
##                                                                 
##                               Unchecked   14    93.3        93.3
##                                 Checked    1     6.7       100.0
##                                                                 
##                               Unchecked   10    66.7        66.7
##                                 Checked    5    33.3       100.0
##                                                                 
##                               Unchecked   15   100.0       100.0
##                                 Checked    0     0.0       100.0
##                                                                 
##                               Unchecked   12    80.0        80.0
##                                 Checked    3    20.0       100.0
##                                                                 
##                               Unchecked   15   100.0       100.0
##                                                                 
##                               Unchecked   11    73.3        73.3
##                                 Checked    4    26.7       100.0
##                                                                 
##                               Unchecked   14    93.3        93.3
##                                 Checked    1     6.7       100.0
##                                                                 
##                               Unchecked   15   100.0       100.0
##                                 Checked    0     0.0       100.0
##                                                                 
##                               Unchecked   14    93.3        93.3
##                                 Checked    1     6.7       100.0
##                                                                 
##                               Unchecked   15   100.0       100.0
##                                 Checked    0     0.0       100.0
##                                                                 
##                               Unchecked   11    73.3        73.3
##                                 Checked    4    26.7       100.0
##                                                                 
##                               Unchecked   15   100.0       100.0
##                                 Checked    0     0.0       100.0
##                                                                 
##                               Unchecked   12    80.0        80.0
##                                 Checked    3    20.0       100.0
##                                                                 
##                               Unchecked   13    86.7        86.7
##                                 Checked    2    13.3       100.0
##                                                                 
##                               Unchecked   14    93.3        93.3
##                                 Checked    1     6.7       100.0
##                                                                 
##                               Unchecked   14    93.3        93.3
##                                 Checked    1     6.7       100.0
##                                                                 
##                               Unchecked   12    80.0        80.0
##                                 Checked    3    20.0       100.0
##                                                                 
##                               Unchecked   12    80.0        80.0
##                                 Checked    3    20.0       100.0
##                                                                 
##                               Unchecked   15   100.0       100.0
##                                 Checked    0     0.0       100.0
##                                                                 
##                               Unchecked   15   100.0       100.0
##                                 Checked    0     0.0       100.0
##                                                                 
##                               Unchecked    5    33.3        33.3
##                                 Checked   10    66.7       100.0
##                                                                 
##                               Unchecked   14    93.3        93.3
##                                 Checked    1     6.7       100.0
##                                                                 
##                               Unchecked    7    46.7        46.7
##                                 Checked    8    53.3       100.0
##                                                                 
##                               Unchecked   12    80.0        80.0
##                                 Checked    3    20.0       100.0
##                                                                 
##                               Unchecked   13    86.7        86.7
##                                 Checked    2    13.3       100.0
##                                                                 
##                               Unchecked   15   100.0       100.0
##                                 Checked    0     0.0       100.0
##                                                                 
##                               Unchecked   14    93.3        93.3
##                                 Checked    1     6.7       100.0
##                                                                 
##                               Unchecked   14    93.3        93.3
##                                 Checked    1     6.7       100.0
##                                                                 
##                               Unchecked   13    86.7        86.7
##                                 Checked    2    13.3       100.0
##                                                                 
##                               Unchecked   13    86.7        86.7
##                                 Checked    2    13.3       100.0
##                                                                 
##                               Unchecked   14    93.3        93.3
##                                 Checked    1     6.7       100.0
##                                                                 
##                               Unchecked   12    80.0        80.0
##                                 Checked    3    20.0       100.0
##                                                                 
##                                    diet   10    66.7        66.7
##                                     npo    5    33.3       100.0
##                                                                 
## 
## p-values
##                                                    pApprox       pExact
## sex_factor                                    6.334148e-01 5.377460e-01
## race_factor                                   1.665324e-01 9.477216e-02
## thirtyday_mortality_overall                   2.669719e-11 6.346866e-13
## primary_dx_factor                             2.002682e-01 2.761585e-01
## ards_factor                                   1.624799e-01 1.097625e-01
## sepsis_factor                                 2.298595e-01 1.450870e-01
## admit_from_factor                             8.101341e-01 8.168277e-01
## covid_upon_admission                                    NA           NA
## bacterial_pneumonia                           8.831088e-01 7.349275e-01
## fungal_pneumonia                                        NA           NA
## viral_pneumonia                                         NA           NA
## chronic_obstructive_pulmonary_disease_copd_1  1.000000e+00 5.229592e-01
## asthma_exacerbation                                     NA           NA
## lung_lobar_collapse                           8.604215e-01 1.000000e+00
## pulmonary_embolism                            1.000000e+00 1.000000e+00
## hemoptysis                                              NA           NA
## pancreatitis                                            NA           NA
## infection_genitourinary_system                1.000000e+00 1.000000e+00
## infection_intra_abdominal                     9.749930e-01 1.000000e+00
## infection_soft_tissue                         1.157759e-01 6.242016e-02
## infection_cns                                 1.000000e+00 5.229592e-01
## hepatic_failure_acute_fullminant                        NA           NA
## hepatic_failure_acute_on_chronic              4.520506e-01 2.184650e-01
## diabetic_ketoacidosis                         1.000000e+00 1.000000e+00
## acute_leukemia                                          NA           NA
## cerebral_vascular_accident_1                  1.000000e+00 1.000000e+00
## acute_myocardial_infarction_nstemi_stemi                NA           NA
## diffuse_alveolar_hemorrhage                             NA           NA
## decompensated_heart_failure_pulmonary_oedema  1.000000e+00 1.000000e+00
## pleural_effusion                              6.708174e-01 3.061224e-01
## interstitial_lung_disease_exacerbation        1.000000e+00 1.000000e+00
## organizing_pneumonia                                    NA           NA
## acute_eosinophilic_pneumoniae                           NA           NA
## other                                         1.000000e+00 1.000000e+00
## angioedema                                    1.000000e+00 1.000000e+00
## acute_renal_failure                           1.000000e+00 1.000000e+00
## altered_mental_status                         7.357720e-01 4.701605e-01
## hypertensive_urgency                          1.000000e+00 1.000000e+00
## hypertensive_emergency                        1.000000e+00 5.229592e-01
## endocarditis                                  6.708174e-01 3.061224e-01
## bacteremia                                    1.000000e+00 1.000000e+00
## gastrointestinal_bleeding                               NA           NA
## hemorrhagic_shock                                       NA           NA
## aspiration                                    3.208816e-01 1.597727e-01
## central_line_associated_blood_steam_infection           NA           NA
## prosthetic_joint_infection                              NA           NA
## new_onset_atrial_fibrillation                           NA           NA
## newly_diagnosed_solid_malignancy              1.000000e+00 1.000000e+00
## myocardial_infract                            6.708174e-01 3.061224e-01
## congestive_heart_failure                      7.148369e-01 5.001490e-01
## peripheral_vascular_disease_cci               8.604215e-01 1.000000e+00
## cerebrovascular_disease                       3.208816e-01 1.597727e-01
## dementia                                                NA           NA
## chronic_pulmonary_disease                     6.465984e-01 5.263120e-01
## connective_tissue_disease_1                   7.501738e-01 6.517775e-01
## ulcer_disease                                 1.000000e+00 1.000000e+00
## mild_liver_disease                            1.000000e+00 5.229592e-01
## diabetes_without_complications                1.021583e-01 8.683169e-02
## diabetes_with_end_organ_damage                2.293030e-01 1.793825e-01
## hemiplegia                                    8.604215e-01 1.000000e+00
## moderate_or_severe_renal_disease              1.000000e+00 1.000000e+00
## solid_tumor_non_metastatic                    1.000000e+00 1.000000e+00
## leukemia                                      1.000000e+00 5.229592e-01
## lymhoma                                       6.708174e-01 3.061224e-01
## moderate_or_severe_liver_disease              7.517316e-01 6.598757e-01
## metastatic_solid_tumor                        1.487488e-01 7.945685e-02
## aids                                          8.604215e-01 1.000000e+00
## penicillins                                   2.912692e-01 3.056945e-01
## cephalosporins                                6.946493e-01 5.424489e-01
## carbapenems                                   1.000000e+00 5.229592e-01
## vancomycin                                    7.754933e-01 7.569133e-01
## metronidazole                                 1.000000e+00 1.000000e+00
## macrolides                                    6.660091e-01 7.021443e-01
## quinolones                                    1.000000e+00 1.000000e+00
## other_2                                       1.000000e+00 1.000000e+00
## clindamycin                                   6.708174e-01 3.061224e-01
## aminoglycosides                               7.551192e-01 5.763560e-01
## doxycycline                                   7.551192e-01 5.763560e-01
## trimethoprim_sulfamethoxazole                 7.501738e-01 6.517775e-01
## rifaximin                                     3.208816e-01 1.597727e-01
## diet                                          1.000000e+00 1.000000e+00
## 
## Standardize mean differences
##                                                   1 vs 2
## sex_factor                                    0.24532965
## race_factor                                   0.79246603
## thirtyday_mortality_overall                          NaN
## primary_dx_factor                             1.19514111
## ards_factor                                   0.53420722
## sepsis_factor                                 0.55261019
## admit_from_factor                             0.59284606
## covid_upon_admission                          0.00000000
## bacterial_pneumonia                           0.15032054
## fungal_pneumonia                              0.00000000
## viral_pneumonia                               0.00000000
## chronic_obstructive_pulmonary_disease_copd_1  0.17487584
## asthma_exacerbation                           0.00000000
## lung_lobar_collapse                           0.35355339
## pulmonary_embolism                            0.24618298
## hemoptysis                                    0.00000000
## pancreatitis                                  0.00000000
## infection_genitourinary_system                0.07240486
## infection_intra_abdominal                     0.17694036
## infection_soft_tissue                         0.58679178
## infection_cns                                 0.17487584
## hepatic_failure_acute_fullminant              0.00000000
## hepatic_failure_acute_on_chronic              0.38715537
## diabetic_ketoacidosis                         0.24618298
## acute_leukemia                                0.00000000
## cerebral_vascular_accident_1                  0.24618298
## acute_myocardial_infarction_nstemi_stemi      0.00000000
## diffuse_alveolar_hemorrhage                   0.00000000
## decompensated_heart_failure_pulmonary_oedema  0.06022021
## pleural_effusion                              0.37796447
## interstitial_lung_disease_exacerbation        0.24618298
## organizing_pneumonia                          0.00000000
## acute_eosinophilic_pneumoniae                 0.00000000
## other                                         0.03234654
## angioedema                                    0.24618298
## acute_renal_failure                           0.08348432
## altered_mental_status                         0.21847370
## hypertensive_urgency                          0.24618298
## hypertensive_emergency                        0.17487584
## endocarditis                                  0.37796447
## bacteremia                                    0.03234654
## gastrointestinal_bleeding                     0.00000000
## hemorrhagic_shock                             0.00000000
## aspiration                                    0.43022084
## central_line_associated_blood_steam_infection 0.00000000
## prosthetic_joint_infection                    0.00000000
## new_onset_atrial_fibrillation                 0.00000000
## newly_diagnosed_solid_malignancy              0.24618298
## myocardial_infract                            0.37796447
## congestive_heart_failure                      0.21863473
## peripheral_vascular_disease_cci               0.35355339
## cerebrovascular_disease                       0.43022084
## dementia                                      0.00000000
## chronic_pulmonary_disease                     0.24899946
## connective_tissue_disease_1                   0.26245140
## ulcer_disease                                 0.24618298
## mild_liver_disease                            0.17487584
## diabetes_without_complications                0.78446454
## diabetes_with_end_organ_damage                0.48031643
## hemiplegia                                    0.35355339
## moderate_or_severe_renal_disease              0.06022021
## solid_tumor_non_metastatic                    0.04736465
## leukemia                                      0.17487584
## lymhoma                                       0.37796447
## moderate_or_severe_liver_disease              0.22675224
## metastatic_solid_tumor                        0.55559022
## aids                                          0.35355339
## penicillins                                   0.58722022
## cephalosporins                                0.22275697
## carbapenems                                   0.17487584
## vancomycin                                    0.18516210
## metronidazole                                 0.08560952
## macrolides                                    0.26526470
## quinolones                                    0.24618298
## other_2                                       0.03234654
## clindamycin                                   0.37796447
## aminoglycosides                               0.25487862
## doxycycline                                   0.25487862
## trimethoprim_sulfamethoxazole                 0.26245140
## rifaximin                                     0.43022084
## diet                                          0.08459099
# Print tableone
tableone_nocovid_print_vc <-
  print(
    tableone_nocovid_vc,
    nonnormal = TRUE,
    formatOptions = list(big.mark = ",")
  )
##                                                                Stratified by thirtyday_mortality_overall
##                                                                 Survivor            
##   n                                                                34               
##   age (median [IQR])                                            63.50 [53.50, 71.00]
##   sex_factor = Male (%)                                            14 ( 41.2)       
##   bmi (median [IQR])                                            28.31 [23.63, 36.07]
##   race_factor (%)                                                                   
##      African American                                              24 ( 70.6)       
##      More than one race                                             1 (  2.9)       
##      Other                                                          0 (  0.0)       
##      White, Hispanic                                                1 (  2.9)       
##      White, non-Hispanic                                            8 ( 23.5)       
##   cci_total_sc (median [IQR])                                    5.00 [3.00, 6.75]  
##   thirtyday_mortality_overall = Non-Survivor (%)                    0 (  0.0)       
##   primary_dx_factor (%)                                                             
##      Acute (on chronic) liver failure                               3 (  8.8)       
##      CNS pathology                                                  0 (  0.0)       
##      GI hemorrhage                                                  2 (  5.9)       
##      Post-operative observation                                     1 (  2.9)       
##      Respiratory failure, AHRF                                     11 ( 32.4)       
##      Respiratory failure, airway compromise                         3 (  8.8)       
##      Respiratory failure, ventilatory                               4 ( 11.8)       
##      Sepsis (+/- septic shock)                                     10 ( 29.4)       
##   ards_factor = Yes (%)                                             4 ( 11.8)       
##   sepsis_factor = Sepsis (%)                                       25 ( 73.5)       
##   admit_from_factor (%)                                                             
##      Cardiology                                                     2 (  5.9)       
##      ED                                                            13 ( 38.2)       
##      General Medicine                                               4 ( 11.8)       
##      Liver                                                          1 (  2.9)       
##      Oncology                                                       2 (  5.9)       
##      OSH                                                            9 ( 26.5)       
##      Surgery                                                        3 (  8.8)       
##   covid_upon_admission = No (%)                                    34 (100.0)       
##   sofa_score_total (median [IQR])                                7.50 [4.25, 10.00] 
##   ap2_total_score (median [IQR])                                22.50 [16.25, 28.75]
##   day_collected (median [IQR])                                   3.00 [1.00, 4.75]  
##   bacterial_pneumonia = Checked (%)                                 9 ( 26.5)       
##   fungal_pneumonia = Unchecked (%)                                 34 (100.0)       
##   viral_pneumonia = Unchecked (%)                                  34 (100.0)       
##   chronic_obstructive_pulmonary_disease_copd_1 = Checked (%)        1 (  2.9)       
##   asthma_exacerbation = Unchecked (%)                              34 (100.0)       
##   lung_lobar_collapse = Checked (%)                                 2 (  5.9)       
##   pulmonary_embolism = Checked (%)                                  1 (  2.9)       
##   hemoptysis = Unchecked (%)                                       34 (100.0)       
##   pancreatitis = Unchecked (%)                                     34 (100.0)       
##   infection_genitourinary_system = Checked (%)                      8 ( 23.5)       
##   infection_intra_abdominal = Checked (%)                           4 ( 11.8)       
##   infection_soft_tissue = Checked (%)                               2 (  5.9)       
##   infection_cns = Checked (%)                                       1 (  2.9)       
##   hepatic_failure_acute_fullminant = Unchecked (%)                 34 (100.0)       
##   hepatic_failure_acute_on_chronic = Checked (%)                    1 (  2.9)       
##   diabetic_ketoacidosis = Checked (%)                               1 (  2.9)       
##   acute_leukemia = Unchecked (%)                                   34 (100.0)       
##   cerebral_vascular_accident_1 = Checked (%)                        1 (  2.9)       
##   acute_myocardial_infarction_nstemi_stemi = Unchecked (%)         34 (100.0)       
##   diffuse_alveolar_hemorrhage = Unchecked (%)                      34 (100.0)       
##   decompensated_heart_failure_pulmonary_oedema = Checked (%)        6 ( 17.6)       
##   pleural_effusion = Checked (%)                                    0 (  0.0)       
##   interstitial_lung_disease_exacerbation = Checked (%)              1 (  2.9)       
##   organizing_pneumonia = Unchecked (%)                             34 (100.0)       
##   acute_eosinophilic_pneumoniae = Unchecked (%)                    34 (100.0)       
##   other = Checked (%)                                               2 (  5.9)       
##   angioedema = Checked (%)                                          1 (  2.9)       
##   acute_renal_failure = Checked (%)                                19 ( 55.9)       
##   altered_mental_status = Checked (%)                               6 ( 17.6)       
##   hypertensive_urgency = Checked (%)                                1 (  2.9)       
##   hypertensive_emergency = Checked (%)                              1 (  2.9)       
##   endocarditis = Checked (%)                                        0 (  0.0)       
##   bacteremia = Checked (%)                                          2 (  5.9)       
##   gastrointestinal_bleeding = Unchecked (%)                        34 (100.0)       
##   hemorrhagic_shock = Unchecked (%)                                34 (100.0)       
##   aspiration = Checked (%)                                          2 (  5.9)       
##   central_line_associated_blood_steam_infection = Unchecked (%)    34 (100.0)       
##   prosthetic_joint_infection = Unchecked (%)                       34 (100.0)       
##   new_onset_atrial_fibrillation = Unchecked (%)                    34 (100.0)       
##   newly_diagnosed_solid_malignancy = Checked (%)                    1 (  2.9)       
##   myocardial_infract = Checked (%)                                  0 (  0.0)       
##   congestive_heart_failure = Checked (%)                            8 ( 23.5)       
##   peripheral_vascular_disease_cci = Checked (%)                     2 (  5.9)       
##   cerebrovascular_disease = Checked (%)                             2 (  5.9)       
##   dementia = Unchecked (%)                                         34 (100.0)       
##   chronic_pulmonary_disease = Checked (%)                          13 ( 38.2)       
##   connective_tissue_disease_1 = Checked (%)                         5 ( 14.7)       
##   ulcer_disease = Checked (%)                                       1 (  2.9)       
##   mild_liver_disease = Checked (%)                                  1 (  2.9)       
##   diabetes_without_complications = Checked (%)                      8 ( 23.5)       
##   diabetes_with_end_organ_damage = Checked (%)                      3 (  8.8)       
##   hemiplegia = Checked (%)                                          2 (  5.9)       
##   moderate_or_severe_renal_disease = Checked (%)                    6 ( 17.6)       
##   solid_tumor_non_metastatic = Checked (%)                          4 ( 11.8)       
##   leukemia = Checked (%)                                            1 (  2.9)       
##   lymhoma = Checked (%)                                             0 (  0.0)       
##   moderate_or_severe_liver_disease = Checked (%)                    4 ( 11.8)       
##   metastatic_solid_tumor = Checked (%)                              1 (  2.9)       
##   aids = Checked (%)                                                2 (  5.9)       
##   penicillins = Checked (%)                                         5 ( 14.7)       
##   cephalosporins = Checked (%)                                     19 ( 55.9)       
##   carbapenems = Checked (%)                                         1 (  2.9)       
##   vancomycin = Checked (%)                                         15 ( 44.1)       
##   metronidazole = Checked (%)                                       8 ( 23.5)       
##   macrolides = Checked (%)                                          8 ( 23.5)       
##   quinolones = Checked (%)                                          1 (  2.9)       
##   other_2 = Checked (%)                                             2 (  5.9)       
##   clindamycin = Checked (%)                                         0 (  0.0)       
##   aminoglycosides = Checked (%)                                     2 (  5.9)       
##   doxycycline = Checked (%)                                         2 (  5.9)       
##   trimethoprim_sulfamethoxazole = Checked (%)                       5 ( 14.7)       
##   rifaximin = Checked (%)                                           2 (  5.9)       
##   diet = npo (%)                                                   10 ( 29.4)       
##   d_sofa_admission (median [IQR])                                1.00 [-1.00, 2.75] 
##   d_sofa_stool (median [IQR])                                    0.00 [0.00, 1.00]  
##                                                                Stratified by thirtyday_mortality_overall
##                                                                 Non-Survivor        
##   n                                                                15               
##   age (median [IQR])                                            62.00 [57.50, 72.00]
##   sex_factor = Male (%)                                             8 ( 53.3)       
##   bmi (median [IQR])                                            32.28 [25.56, 37.39]
##   race_factor (%)                                                                   
##      African American                                               6 ( 40.0)       
##      More than one race                                             1 (  6.7)       
##      Other                                                          1 (  6.7)       
##      White, Hispanic                                                0 (  0.0)       
##      White, non-Hispanic                                            7 ( 46.7)       
##   cci_total_sc (median [IQR])                                    6.00 [5.00, 7.50]  
##   thirtyday_mortality_overall = Non-Survivor (%)                   15 (100.0)       
##   primary_dx_factor (%)                                                             
##      Acute (on chronic) liver failure                               0 (  0.0)       
##      CNS pathology                                                  1 (  6.7)       
##      GI hemorrhage                                                  0 (  0.0)       
##      Post-operative observation                                     0 (  0.0)       
##      Respiratory failure, AHRF                                      6 ( 40.0)       
##      Respiratory failure, airway compromise                         0 (  0.0)       
##      Respiratory failure, ventilatory                               0 (  0.0)       
##      Sepsis (+/- septic shock)                                      8 ( 53.3)       
##   ards_factor = Yes (%)                                             5 ( 33.3)       
##   sepsis_factor = Sepsis (%)                                       14 ( 93.3)       
##   admit_from_factor (%)                                                             
##      Cardiology                                                     1 (  6.7)       
##      ED                                                             4 ( 26.7)       
##      General Medicine                                               2 ( 13.3)       
##      Liver                                                          1 (  6.7)       
##      Oncology                                                       2 ( 13.3)       
##      OSH                                                            5 ( 33.3)       
##      Surgery                                                        0 (  0.0)       
##   covid_upon_admission = No (%)                                    15 (100.0)       
##   sofa_score_total (median [IQR])                               11.00 [8.00, 14.00] 
##   ap2_total_score (median [IQR])                                29.00 [24.00, 30.00]
##   day_collected (median [IQR])                                   1.00 [1.00, 3.00]  
##   bacterial_pneumonia = Checked (%)                                 5 ( 33.3)       
##   fungal_pneumonia = Unchecked (%)                                 15 (100.0)       
##   viral_pneumonia = Unchecked (%)                                  15 (100.0)       
##   chronic_obstructive_pulmonary_disease_copd_1 = Checked (%)        1 (  6.7)       
##   asthma_exacerbation = Unchecked (%)                              15 (100.0)       
##   lung_lobar_collapse = Checked (%)                                 0 (  0.0)       
##   pulmonary_embolism = Checked (%)                                  0 (  0.0)       
##   hemoptysis = Unchecked (%)                                       15 (100.0)       
##   pancreatitis = Unchecked (%)                                     15 (100.0)       
##   infection_genitourinary_system = Checked (%)                      4 ( 26.7)       
##   infection_intra_abdominal = Checked (%)                           1 (  6.7)       
##   infection_soft_tissue = Checked (%)                               4 ( 26.7)       
##   infection_cns = Checked (%)                                       1 (  6.7)       
##   hepatic_failure_acute_fullminant = Unchecked (%)                 15 (100.0)       
##   hepatic_failure_acute_on_chronic = Checked (%)                    2 ( 13.3)       
##   diabetic_ketoacidosis = Checked (%)                               0 (  0.0)       
##   acute_leukemia = Unchecked (%)                                   15 (100.0)       
##   cerebral_vascular_accident_1 = Checked (%)                        0 (  0.0)       
##   acute_myocardial_infarction_nstemi_stemi = Unchecked (%)         15 (100.0)       
##   diffuse_alveolar_hemorrhage = Unchecked (%)                      15 (100.0)       
##   decompensated_heart_failure_pulmonary_oedema = Checked (%)        3 ( 20.0)       
##   pleural_effusion = Checked (%)                                    1 (  6.7)       
##   interstitial_lung_disease_exacerbation = Checked (%)              0 (  0.0)       
##   organizing_pneumonia = Unchecked (%)                             15 (100.0)       
##   acute_eosinophilic_pneumoniae = Unchecked (%)                    15 (100.0)       
##   other = Checked (%)                                               1 (  6.7)       
##   angioedema = Checked (%)                                          0 (  0.0)       
##   acute_renal_failure = Checked (%)                                 9 ( 60.0)       
##   altered_mental_status = Checked (%)                               4 ( 26.7)       
##   hypertensive_urgency = Checked (%)                                0 (  0.0)       
##   hypertensive_emergency = Checked (%)                              1 (  6.7)       
##   endocarditis = Checked (%)                                        1 (  6.7)       
##   bacteremia = Checked (%)                                          1 (  6.7)       
##   gastrointestinal_bleeding = Unchecked (%)                        15 (100.0)       
##   hemorrhagic_shock = Unchecked (%)                                15 (100.0)       
##   aspiration = Checked (%)                                          3 ( 20.0)       
##   central_line_associated_blood_steam_infection = Unchecked (%)    15 (100.0)       
##   prosthetic_joint_infection = Unchecked (%)                       15 (100.0)       
##   new_onset_atrial_fibrillation = Unchecked (%)                    15 (100.0)       
##   newly_diagnosed_solid_malignancy = Checked (%)                    0 (  0.0)       
##   myocardial_infract = Checked (%)                                  1 (  6.7)       
##   congestive_heart_failure = Checked (%)                            5 ( 33.3)       
##   peripheral_vascular_disease_cci = Checked (%)                     0 (  0.0)       
##   cerebrovascular_disease = Checked (%)                             3 ( 20.0)       
##   dementia = Unchecked (%)                                         15 (100.0)       
##   chronic_pulmonary_disease = Checked (%)                           4 ( 26.7)       
##   connective_tissue_disease_1 = Checked (%)                         1 (  6.7)       
##   ulcer_disease = Checked (%)                                       0 (  0.0)       
##   mild_liver_disease = Checked (%)                                  1 (  6.7)       
##   diabetes_without_complications = Checked (%)                      0 (  0.0)       
##   diabetes_with_end_organ_damage = Checked (%)                      4 ( 26.7)       
##   hemiplegia = Checked (%)                                          0 (  0.0)       
##   moderate_or_severe_renal_disease = Checked (%)                    3 ( 20.0)       
##   solid_tumor_non_metastatic = Checked (%)                          2 ( 13.3)       
##   leukemia = Checked (%)                                            1 (  6.7)       
##   lymhoma = Checked (%)                                             1 (  6.7)       
##   moderate_or_severe_liver_disease = Checked (%)                    3 ( 20.0)       
##   metastatic_solid_tumor = Checked (%)                              3 ( 20.0)       
##   aids = Checked (%)                                                0 (  0.0)       
##   penicillins = Checked (%)                                         0 (  0.0)       
##   cephalosporins = Checked (%)                                     10 ( 66.7)       
##   carbapenems = Checked (%)                                         1 (  6.7)       
##   vancomycin = Checked (%)                                          8 ( 53.3)       
##   metronidazole = Checked (%)                                       3 ( 20.0)       
##   macrolides = Checked (%)                                          2 ( 13.3)       
##   quinolones = Checked (%)                                          0 (  0.0)       
##   other_2 = Checked (%)                                             1 (  6.7)       
##   clindamycin = Checked (%)                                         1 (  6.7)       
##   aminoglycosides = Checked (%)                                     2 ( 13.3)       
##   doxycycline = Checked (%)                                         2 ( 13.3)       
##   trimethoprim_sulfamethoxazole = Checked (%)                       1 (  6.7)       
##   rifaximin = Checked (%)                                           3 ( 20.0)       
##   diet = npo (%)                                                    5 ( 33.3)       
##   d_sofa_admission (median [IQR])                               -1.00 [-1.50, 2.00] 
##   d_sofa_stool (median [IQR])                                    0.00 [-2.75, 0.00] 
##                                                                Stratified by thirtyday_mortality_overall
##                                                                 p      test   
##   n                                                                           
##   age (median [IQR])                                             0.641 nonnorm
##   sex_factor = Male (%)                                          0.633        
##   bmi (median [IQR])                                             0.529 nonnorm
##   race_factor (%)                                                0.167        
##      African American                                                         
##      More than one race                                                       
##      Other                                                                    
##      White, Hispanic                                                          
##      White, non-Hispanic                                                      
##   cci_total_sc (median [IQR])                                    0.092 nonnorm
##   thirtyday_mortality_overall = Non-Survivor (%)                <0.001        
##   primary_dx_factor (%)                                          0.200        
##      Acute (on chronic) liver failure                                         
##      CNS pathology                                                            
##      GI hemorrhage                                                            
##      Post-operative observation                                               
##      Respiratory failure, AHRF                                                
##      Respiratory failure, airway compromise                                   
##      Respiratory failure, ventilatory                                         
##      Sepsis (+/- septic shock)                                                
##   ards_factor = Yes (%)                                          0.162        
##   sepsis_factor = Sepsis (%)                                     0.230        
##   admit_from_factor (%)                                          0.810        
##      Cardiology                                                               
##      ED                                                                       
##      General Medicine                                                         
##      Liver                                                                    
##      Oncology                                                                 
##      OSH                                                                      
##      Surgery                                                                  
##   covid_upon_admission = No (%)                                     NA        
##   sofa_score_total (median [IQR])                                0.012 nonnorm
##   ap2_total_score (median [IQR])                                 0.049 nonnorm
##   day_collected (median [IQR])                                   0.126 nonnorm
##   bacterial_pneumonia = Checked (%)                              0.883        
##   fungal_pneumonia = Unchecked (%)                                  NA        
##   viral_pneumonia = Unchecked (%)                                   NA        
##   chronic_obstructive_pulmonary_disease_copd_1 = Checked (%)     1.000        
##   asthma_exacerbation = Unchecked (%)                               NA        
##   lung_lobar_collapse = Checked (%)                              0.860        
##   pulmonary_embolism = Checked (%)                               1.000        
##   hemoptysis = Unchecked (%)                                        NA        
##   pancreatitis = Unchecked (%)                                      NA        
##   infection_genitourinary_system = Checked (%)                   1.000        
##   infection_intra_abdominal = Checked (%)                        0.975        
##   infection_soft_tissue = Checked (%)                            0.116        
##   infection_cns = Checked (%)                                    1.000        
##   hepatic_failure_acute_fullminant = Unchecked (%)                  NA        
##   hepatic_failure_acute_on_chronic = Checked (%)                 0.452        
##   diabetic_ketoacidosis = Checked (%)                            1.000        
##   acute_leukemia = Unchecked (%)                                    NA        
##   cerebral_vascular_accident_1 = Checked (%)                     1.000        
##   acute_myocardial_infarction_nstemi_stemi = Unchecked (%)          NA        
##   diffuse_alveolar_hemorrhage = Unchecked (%)                       NA        
##   decompensated_heart_failure_pulmonary_oedema = Checked (%)     1.000        
##   pleural_effusion = Checked (%)                                 0.671        
##   interstitial_lung_disease_exacerbation = Checked (%)           1.000        
##   organizing_pneumonia = Unchecked (%)                              NA        
##   acute_eosinophilic_pneumoniae = Unchecked (%)                     NA        
##   other = Checked (%)                                            1.000        
##   angioedema = Checked (%)                                       1.000        
##   acute_renal_failure = Checked (%)                              1.000        
##   altered_mental_status = Checked (%)                            0.736        
##   hypertensive_urgency = Checked (%)                             1.000        
##   hypertensive_emergency = Checked (%)                           1.000        
##   endocarditis = Checked (%)                                     0.671        
##   bacteremia = Checked (%)                                       1.000        
##   gastrointestinal_bleeding = Unchecked (%)                         NA        
##   hemorrhagic_shock = Unchecked (%)                                 NA        
##   aspiration = Checked (%)                                       0.321        
##   central_line_associated_blood_steam_infection = Unchecked (%)     NA        
##   prosthetic_joint_infection = Unchecked (%)                        NA        
##   new_onset_atrial_fibrillation = Unchecked (%)                     NA        
##   newly_diagnosed_solid_malignancy = Checked (%)                 1.000        
##   myocardial_infract = Checked (%)                               0.671        
##   congestive_heart_failure = Checked (%)                         0.715        
##   peripheral_vascular_disease_cci = Checked (%)                  0.860        
##   cerebrovascular_disease = Checked (%)                          0.321        
##   dementia = Unchecked (%)                                          NA        
##   chronic_pulmonary_disease = Checked (%)                        0.647        
##   connective_tissue_disease_1 = Checked (%)                      0.750        
##   ulcer_disease = Checked (%)                                    1.000        
##   mild_liver_disease = Checked (%)                               1.000        
##   diabetes_without_complications = Checked (%)                   0.102        
##   diabetes_with_end_organ_damage = Checked (%)                   0.229        
##   hemiplegia = Checked (%)                                       0.860        
##   moderate_or_severe_renal_disease = Checked (%)                 1.000        
##   solid_tumor_non_metastatic = Checked (%)                       1.000        
##   leukemia = Checked (%)                                         1.000        
##   lymhoma = Checked (%)                                          0.671        
##   moderate_or_severe_liver_disease = Checked (%)                 0.752        
##   metastatic_solid_tumor = Checked (%)                           0.149        
##   aids = Checked (%)                                             0.860        
##   penicillins = Checked (%)                                      0.291        
##   cephalosporins = Checked (%)                                   0.695        
##   carbapenems = Checked (%)                                      1.000        
##   vancomycin = Checked (%)                                       0.775        
##   metronidazole = Checked (%)                                    1.000        
##   macrolides = Checked (%)                                       0.666        
##   quinolones = Checked (%)                                       1.000        
##   other_2 = Checked (%)                                          1.000        
##   clindamycin = Checked (%)                                      0.671        
##   aminoglycosides = Checked (%)                                  0.755        
##   doxycycline = Checked (%)                                      0.755        
##   trimethoprim_sulfamethoxazole = Checked (%)                    0.750        
##   rifaximin = Checked (%)                                        0.321        
##   diet = npo (%)                                                 1.000        
##   d_sofa_admission (median [IQR])                                0.331 nonnorm
##   d_sofa_stool (median [IQR])                                    0.075 nonnorm
# Save CSV
write.csv(
  tableone_nocovid_print_vc,
  "./Results/Table_One_30_Days_Mortality_validation.csv",
  row.names = TRUE
)

# Save table for paper
tableone_nocovid_print_vc_clean <-
tableone_nocovid_print_vc %>%
  as.data.frame() %>%
  rownames_to_column(var = "variable") %>% #distinct(variable)
  mutate(
    variable = dplyr::recode(
      variable,
            n = "Number of Patients",
      `age (median [IQR])` = "Age (median [IQR])",
      `sex_factor = Male (%)` = "Male (%)",
      `bmi (median [IQR])` = "Body Mass Index (median [IQR])",
      `race_factor (%)` = "Race (%)",
      `African American` = " African American",
      `Asian` = "Asian",
      `More than one race` = "More than one race",
      `White, Hispanic` = "White, Hispanic",
      `White, non-Hispanic` = "White, Non-Hispanic",
      `Other` = "Unknown Race",
      `cci_total_sc (median [IQR])` = "Charlson Comorbidity Index (median [IQR])",
      `primary_dx_factor (%)` = "Primary admission diagnosis (%)",
      `Acute (on chronic) liver failure` = "Acute chronic liver failure",
      # `X...AMI.dysrhythmia` = "AMI dysrhytmia",
      # `X...CHF.cardiogenic.shock` = "CHF cardiogenic shock",
      `CNS pathology` = "CNS pathology",
      `GI hemorrhage` = "GI hemorrhage",
      # `X...Metabolic` = "Metabolic",
      # `X...Other` = "Other Primary diagnosis",
      `Post-operative observation` = "Post-operative observation",
      `Respiratory failure, AHRF` = "Respiratory failure (AHRF)",
      `Respiratory failure, airway compromise` = "Respiratory failure, airway compromise",
      `Respiratory failure, ventilatory` = "Respiratory failure, ventilatory",
      `Sepsis (+/- septic shock)` = "Sepsis, septic shock",
      `ards_factor = Yes (%)` = "Acute respiratory distress syndrome (%)",
      `sepsis_factor = Sepsis (%)` = "Sepsis (%)",
      `admit_from_factor (%)` = "Admitted from (%)",
      `Cardiology` = "Cardiology",
      `ED` = "Emergency Department",
      `General Medicine` = "General Medicine",
      `Liver` = "Liver",
      # `Neurology` = "Nuerology",
      `Oncology` = "Oncology",
      `OSH` = "Outside Hospital",
      `Surgery` = "Surgery",
      # `X...NA.1` = "Unknown",
      `covid_upon_admission = No (%)` = "No Covid upon admission (%)",
      `sofa_score_total (median [IQR])` = "SOFA Score (median [IQR])",
      `ap2_total_score (median [IQR])` = "APACHE II Score (median [IQR])",
      `day_collected (median [IQR])` = "Day From Admission Stool Sample Collected (median [IQR])",
      `bacterial_pneumonia = Checked (%)` = "Bacterial Pneumonia (%)",
      `fungal_pneumonia = Checked (%)` = "Fungal Pneumonia (%)",
      `viral_pneumonia = Checked (%)` = "Viral Pneumonia (%)",
      `chronic_obstructive_pulmonary_disease_copd_1 = Checked (%)` = "Chronic Obstructive Pulmonary Disease (COPD) (%)",
      `asthma_exacerbation = Unchecked (%)` = "Asthma exacerbation (%)",
      `lung_lobar_collapse = Checked (%)` = "Lung/lobar collapse (%)",
      `pulmonary_embolism = Checked (%)` = "Pulmonary embolism (%)",
      `hemoptysis = Unchecked (%)` = "Hemoptysis (%)",
      `pancreatitis = Unchecked (%)` = "Pancreatitis (%)",
      `infection_genitourinary_system = Checked (%)` = "Infection, genitourinary system (%)",
      `infection_intra_abdominal = Checked (%)` = "Infection, Intra-abdominal (%)",
      `infection_soft_tissue = Checked (%)` = "Infection, soft tissue (%)",
      `infection_cns = Checked (%)` = "Infection, CNS (%)",
      `hepatic_failure_acute_fullminant = Unchecked (%)` = "Hepatic failure, acute fullminant (%)",
      `hepatic_failure_acute_on_chronic = Checked (%)` = "Hepatic failure, acute on chronic (%)",
      `diabetic_ketoacidosis = Checked (%)` = "Diabetic ketoacidosis (%)",
      `acute_leukemia = Unchecked (%)` = "Acute leukemia (%)",
      `cerebral_vascular_accident_1 = Checked (%)` = "Cerebreal vascular accident (%)",
      `acute_myocardial_infarction_nstemi_stemi = Unchecked (%)` = "Acute myocardial infarction (NSTEMI/STEMI) (%)",
      `diffuse_alveolar_hemorrhage = Unchecked (%)` = "Diffuse alveolar hemorrhage (%)",
      `decompensated_heart_failure_pulmonary_oedema = Checked (%)` = "Decompensated heart failure/Pulmonary oedema (%)",
      `pleural_effusion = Checked (%)` = "Pleural effusion (%)",
      `interstitial_lung_disease_exacerbation = Checked (%)` = "Interstitial lung disease exacerbation (%)",
      `organizing_pneumonia = Unchecked (%)` = "Organizing pneumonia (%)",
      `acute_eosinophilic_pneumoniae = Unchecked (%)` = "Acute eosinophilic pneumoniae (%)",
      `other = Checked (%)` = "Other (%)",
      `angioedema = Checked (%)` = "Angioedema (%)",
      `acute_renal_failure = Checked (%)` = "Acute renal failure (%)",
      `altered_mental_status = Checked (%)` = "Altered mental status (%)",
      `hypertensive_urgency = Checked (%)` = "Hypertensive urgency (%)",
      `hypertensive_emergency = Checked (%)` = "Hypertensive emergency (%)",
      `endocarditis = Checked (%)` = "Endocarditis (%)",
      `bacteremia = Checked (%)` = "Bacteremia (%)",
      `gastrointestinal_bleeding = Unchecked (%)` = "Gastrointestinal bleeding (%)",
      `hemorrhagic_shock = Unchecked (%)` = "Hemorrhagic shock (%)",
      `aspiration = Checked (%)` = "Aspiration (%)",
      `central_line_associated_blood_steam_infection = Unchecked (%)` = "Central line associated blood steam infection (%)",
      `prosthetic_joint_infection = Unchecked (%)` = "Prosthetic joint infection (%)",
      `new_onset_atrial_fibrillation = Unchecked (%)` = "New onset atrial fibrillation (%)",
      `newly_diagnosed_solid_malignancy = Checked (%)` = "Newly diagnosed solid malignancy (%)",
      `myocardial_infract = Checked (%)` = "Myocardial infract (%)",
      `congestive_heart_failure = Checked (%)` = "Congestive heart failure (%)",
      `peripheral_vascular_disease_cci = Checked (%)` = "Peripheral vascular disease (%)",
      `cerebrovascular_disease = Checked (%)` = "Cerebrovascular disease (%)",
      `dementia = Unchecked (%)` = "Dementia (%)",
      `chronic_pulmonary_disease = Checked (%)` = "Chronic pulmonary disease (%)",
      `connective_tissue_disease_1 = Checked (%)` = "Connective tissue disease (%)",
      `ulcer_disease = Checked (%)` = "Ulcer disease (%)",
      `mild_liver_disease = Checked (%)` = "Mild liver disease (%)",
      `diabetes_without_complications = Checked (%)` = "Diabetes (without complications) (%)",
      `diabetes_with_end_organ_damage = Checked (%)` = "Diabetes (with end organ damage) (%)",
      `hemiplegia = Checked (%)` = "Hemiplegia (%)",
      `moderate_or_severe_renal_disease = Checked (%)` = "Moderate or severe renal disease (%)",
      `solid_tumor_non_metastatic = Checked (%)` = "Solid tumor (non-metastatic) (%)",
      `leukemia = Checked (%)` = "Leukemia (%)",
      `lymhoma = Checked (%)` = "Lymphoma (%)",
      `moderate_or_severe_liver_disease = Checked (%)` = "Moderate or severe liver disease (%)",
      `metastatic_solid_tumor = Checked (%)` = "Solid tumor (metastatic) (%)",
      `aids = Checked (%)` = "AIDS (%)",
      `penicillins = Checked (%)` = "Penicillins (%)",
      `cephalosporins = Checked (%)` = "Cephalosporins (%)",
      `carbapenems = Checked (%)` = "Carbapenems (%)",
      `vancomycin = Checked (%)` = "Vancomycin (%)",
      `metronidazole = Checked (%)` = "Metronidazole (%)",
      `macrolides = Checked (%)` = "Macrolides (%)",
      `quinolones = Checked (%)` = "Quinolones (%)",
      `other_2 = Checked (%)` = "Other Antiobiotics (%)",
      `clindamycin = Checked (%)` = "Clindamycin (%)",
      `aminoglycosides = Checked (%)` = "Aminoglycosides (%)",
      `doxycycline = Checked (%)` = "Doxycycline (%)",
      `trimethoprim_sulfamethoxazole = Checked (%)` = "Trimethoprim-Sulfamethoxazole (%)",
      `rifaximin = Checked (%)` = "Rifaximin (%)",
      `diet...npo....` = "Diet (nothing by mouth) (%)",
      `d_sofa_admission..median..IQR..` = "SOFA from admission (median [IQR])",
      `d_sofa_stool..median..IQR..` = "SOFA from Stool Sample (median [IQR])"
    )
  ) %>% 
  column_to_rownames(var = "variable")

# Export to csv to then load in as a dataframe
write.csv(
  tableone_nocovid_print_vc_clean,
  "./Results/Table_One_30_Days_Mortality_validation_clean.csv",
  row.names = TRUE
)

# Import csv as dataframe
tableone_nocovid_csv_vc <-
  read.csv("./Results/Table_One_30_Days_Mortality_validation.csv",
    stringsAsFactors = FALSE
  )

# Filter for only p-values <= 0.3 to then include in multi-variable model
tableone_pval_filt_vc <- tableone_nocovid_csv_vc %>%
  dplyr::rename(variable = X) %>%
  mutate(
    p = ifelse(p == "<0.001", 0.001, p),
    p = as.numeric(p)
  ) %>%
  # dplyr::slice(2:5, 11, 22:23, 33:35, 95:107) %>%
  filter(!grepl(variable, pattern = "^\\s"))

tableone_pval_filt_vars_vc <- tableone_pval_filt_vc %>%
  filter(variable != "n") %>%
  select(variable) %>%
  mutate(
    variable = as.character(variable),
    variable = gsub(
      x = variable,
      pattern = "\\s\\(median \\[IQR\\]\\)|\\s\\(%\\)| = Yes| = [Cc]hecked| = [Uu]nchecked| = Male| = npo| = Sepsis| = None",
      fixed = FALSE,
      replacement = ""
    )
  ) %>%
  filter(variable %!in% c("thirtyday_mortality_overall = Non-Survivor", "covid_upon_admission = No")) %>%
  pull(variable)

tableone_nocovid_df_filt_vc <-
  tableone_nocovid_df_vc[, tableone_pval_filt_vars_vc]

tableone_nocovid_df_filt_vc <- tableone_nocovid_df_filt_vc %>%
  bind_cols(
    micu_new_nocovid_vc %>% ungroup() %>%
      left_join(cri_rxmar_abx_long_vc, by = "unique_id") %>%
      mutate(across(
        Cephalosporins:Quinolones, ~ replace_na(., "unchecked")
      )) %>%
      mutate(across(
        Cephalosporins:Quinolones, ~ as.factor(.)
      )) %>%
      select(unique_id, thirtyday_mortality_overall)
  ) %>%
  relocate(unique_id, .before = NULL) %>%
  mutate_all(as.character) %>% 
  pivot_longer(
    !c(unique_id:day_collected, thirtyday_mortality_overall),
    names_to = "variable",
    values_to = "value"
  ) %>%
  mutate(
    value = as.character(value),
    value = ifelse(value %in% c("Checked", "checked", "diet"), 1, 0)
  ) %>% # diet = 1, npo = 0
  pivot_wider(names_from = "variable", values_from = "value") %>% 
    mutate(age = as.numeric(age),
         bmi = as.numeric(bmi),
         cci_total_sc = as.numeric(cci_total_sc),
         sofa_score_total = as.numeric(sofa_score_total),
         ap2_total_score = as.numeric(ap2_total_score),
         day_collected = as.numeric(day_collected)) %>% 
  mutate_if(is.character, as.factor)


# Variables labels
cox_df_vc <- tableone_nocovid_df_filt_vc %>%
  left_join(micu_new_nocovid_vc %>% distinct(unique_id, metabolomicsID)) %>%
  labelled::remove_labels() %>%
  janitor::clean_names() %>%
  mutate(
    race_factor = as.character(race_factor),
    race_factor = ifelse(
      race_factor %in% c("Asian", "More than one race", "White, Hispanic"),
      "Other",
      race_factor
    )
  ) %>%
  dplyr::rename(metabolomicsID = metabolomics_id) %>%
  left_join(cutpoints_results_var_slct_vc %>% select(metabolomicsID, md_score)) %>%
  mutate(grouped_md_score = ifelse(
    md_score >= coordinates_mds$threshold,
    "High Score",
    "Low Score"
  )) %>%
  right_join(
    micu_new_nocovid_vc %>% select(
      unique_id,
      days_until_death_overall,
      censoring_thirtyday_mortality_overall,
      thirtyday_mortality_overall
    )
  ) %>%
  mutate(
    surv_days = ifelse(
      is.na(days_until_death_overall) &
        thirtyday_mortality_overall == "Survivor",
      censoring_thirtyday_mortality_overall,
      days_until_death_overall
    ),
    surv_days = ifelse(
      is.na(surv_days) &
        thirtyday_mortality_overall == "Survivor",
      30,
      surv_days
    ),
    surv_days = ifelse(
      surv_days > 30 &
        thirtyday_mortality_overall == "Survivor",
      30,
      surv_days
    ),
    thirtyday_mortality_overall_class = ifelse(thirtyday_mortality_overall == "Survivor", 0, 1)
  ) %>%
  group_by(metabolomicsID) %>%
  dplyr::slice(1) %>%
  dplyr::rename(`Charlson Comorbidity Index` = cci_total_sc) %>%
  mutate(diet = ifelse(diet == "1", "Diet", "NPO")) %>% 
  dplyr::rename(
    `Sex` = "sex_factor",
    `Age` = "age",
    `Acute respiratory distress syndrome` = "ards_factor",
    `Sepsis` = "sepsis_factor",
    `SOFA Score` = "sofa_score_total",
    `Race` = "race_factor",
    `Time to stool sample` = "day_collected",
    `Diet` = "diet",
    `MDS` = "md_score"
  )

reset_gtsummary_theme()

coxauc_vc <-
  coxph(
    Surv(
      cox_df_vc$surv_days,
      cox_df_vc$thirtyday_mortality_overall_class
    ) ~
      `Sex` +
      `Age` +
      `Charlson Comorbidity Index` +
      `Acute respiratory distress syndrome` +
      `Sepsis` +
      `SOFA Score` +
      `Race` +
      `Time to stool sample` +
      `Diet` +
      `MDS`,
    data = cox_df_vc
  ) %>%
  tbl_regression(
    exp = TRUE,
    pvalue_fun = function(x) {
      if_else(is.na(x), NA_character_, if_else(
        x < 0.001,
        format(x,
          digits = 3, scientific = TRUE
        ),
        format(round(x, 3),
          scientific = F
        )
      ))
    }
  ) %>%
  modify_footnote(everything() ~ NA, abbreviation = TRUE)


coxauc_vc %>%
  gtsummary::modify_caption("**Cox Proportional Hazards Regression**")
Cox Proportional Hazards Regression
Characteristic HR 95% CI p-value
Sex
    Female
    Male 4.41 1.00, 19.5 0.051
Age 1.06 0.99, 1.12 0.080
Charlson Comorbidity Index 0.98 0.70, 1.38 0.906
Acute respiratory distress syndrome
    No
    Yes 6.14 0.92, 41.0 0.061
Sepsis
    None
    Sepsis 3.07 0.28, 33.1 0.356
SOFA Score 1.29 1.01, 1.65 0.038
Race
    African American
    Other 125 6.07, 2,570 0.002
    White, non-Hispanic 2.83 0.69, 11.7 0.149
Time to stool sample 0.68 0.44, 1.04 0.076
Diet
    Diet
    NPO 1.83 0.34, 9.85 0.480
MDS 1.31 0.89, 1.93 0.166
# In case you get an error: "Error in s$close() : attempt to apply non-function", run this code below:
# f <- chromote::default_chromote_object() #get the f object
# f$close()

gt::gtsave(gtsummary::as_gt(coxauc_vc), file = "./Results/cox_model_SOFA_30_Day_Mortality_roc_loop_validation.png")

Save Data Image

save.image(file = "./Data/MICU_Data_Anon.RData")